tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

unorm2.h (26278B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2009-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  unorm2.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2009dec15
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 #ifndef __UNORM2_H__
     20 #define __UNORM2_H__
     21 
     22 /**
     23 * \file
     24 * \brief C API: New API for Unicode Normalization.
     25 *
     26 * Unicode normalization functionality for standard Unicode normalization or
     27 * for using custom mapping tables.
     28 * All instances of UNormalizer2 are unmodifiable/immutable.
     29 * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
     30 * For more details see the Normalizer2 C++ class.
     31 */
     32 
     33 #include "unicode/utypes.h"
     34 #include "unicode/stringoptions.h"
     35 #include "unicode/uset.h"
     36 
     37 #if U_SHOW_CPLUSPLUS_API
     38 #include "unicode/localpointer.h"
     39 #endif   // U_SHOW_CPLUSPLUS_API
     40 
     41 /**
     42 * Constants for normalization modes.
     43 * For details about standard Unicode normalization forms
     44 * and about the algorithms which are also used with custom mapping tables
     45 * see http://www.unicode.org/unicode/reports/tr15/
     46 * @stable ICU 4.4
     47 */
     48 typedef enum {
     49    /**
     50     * Decomposition followed by composition.
     51     * Same as standard NFC when using an "nfc" instance.
     52     * Same as standard NFKC when using an "nfkc" instance.
     53     * For details about standard Unicode normalization forms
     54     * see http://www.unicode.org/unicode/reports/tr15/
     55     * @stable ICU 4.4
     56     */
     57    UNORM2_COMPOSE,
     58    /**
     59     * Map, and reorder canonically.
     60     * Same as standard NFD when using an "nfc" instance.
     61     * Same as standard NFKD when using an "nfkc" instance.
     62     * For details about standard Unicode normalization forms
     63     * see http://www.unicode.org/unicode/reports/tr15/
     64     * @stable ICU 4.4
     65     */
     66    UNORM2_DECOMPOSE,
     67    /**
     68     * "Fast C or D" form.
     69     * If a string is in this form, then further decomposition <i>without reordering</i>
     70     * would yield the same form as DECOMPOSE.
     71     * Text in "Fast C or D" form can be processed efficiently with data tables
     72     * that are "canonically closed", that is, that provide equivalent data for
     73     * equivalent text, without having to be fully normalized.
     74     * Not a standard Unicode normalization form.
     75     * Not a unique form: Different FCD strings can be canonically equivalent.
     76     * For details see http://www.unicode.org/notes/tn5/#FCD
     77     * @stable ICU 4.4
     78     */
     79    UNORM2_FCD,
     80    /**
     81     * Compose only contiguously.
     82     * Also known as "FCC" or "Fast C Contiguous".
     83     * The result will often but not always be in NFC.
     84     * The result will conform to FCD which is useful for processing.
     85     * Not a standard Unicode normalization form.
     86     * For details see http://www.unicode.org/notes/tn5/#FCC
     87     * @stable ICU 4.4
     88     */
     89    UNORM2_COMPOSE_CONTIGUOUS
     90 } UNormalization2Mode;
     91 
     92 /**
     93 * Result values for normalization quick check functions.
     94 * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
     95 * @stable ICU 2.0
     96 */
     97 typedef enum UNormalizationCheckResult {
     98  /**
     99   * The input string is not in the normalization form.
    100   * @stable ICU 2.0
    101   */
    102  UNORM_NO,
    103  /**
    104   * The input string is in the normalization form.
    105   * @stable ICU 2.0
    106   */
    107  UNORM_YES,
    108  /**
    109   * The input string may or may not be in the normalization form.
    110   * This value is only returned for composition forms like NFC and FCC,
    111   * when a backward-combining character is found for which the surrounding text
    112   * would have to be analyzed further.
    113   * @stable ICU 2.0
    114   */
    115  UNORM_MAYBE
    116 } UNormalizationCheckResult;
    117 
    118 /**
    119 * Opaque C service object type for the new normalization API.
    120 * @stable ICU 4.4
    121 */
    122 struct UNormalizer2;
    123 typedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
    124 
    125 #if !UCONFIG_NO_NORMALIZATION
    126 
    127 /**
    128 * Returns a UNormalizer2 instance for Unicode NFC normalization.
    129 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
    130 * Returns an unmodifiable singleton instance. Do not delete it.
    131 * @param pErrorCode Standard ICU error code. Its input value must
    132 *                  pass the U_SUCCESS() test, or else the function returns
    133 *                  immediately. Check for U_FAILURE() on output or use with
    134 *                  function chaining. (See User Guide for details.)
    135 * @return the requested Normalizer2, if successful
    136 * @stable ICU 49
    137 */
    138 U_CAPI const UNormalizer2 * U_EXPORT2
    139 unorm2_getNFCInstance(UErrorCode *pErrorCode);
    140 
    141 /**
    142 * Returns a UNormalizer2 instance for Unicode NFD normalization.
    143 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
    144 * Returns an unmodifiable singleton instance. Do not delete it.
    145 * @param pErrorCode Standard ICU error code. Its input value must
    146 *                  pass the U_SUCCESS() test, or else the function returns
    147 *                  immediately. Check for U_FAILURE() on output or use with
    148 *                  function chaining. (See User Guide for details.)
    149 * @return the requested Normalizer2, if successful
    150 * @stable ICU 49
    151 */
    152 U_CAPI const UNormalizer2 * U_EXPORT2
    153 unorm2_getNFDInstance(UErrorCode *pErrorCode);
    154 
    155 /**
    156 * Returns a UNormalizer2 instance for Unicode NFKC normalization.
    157 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
    158 * Returns an unmodifiable singleton instance. Do not delete it.
    159 * @param pErrorCode Standard ICU error code. Its input value must
    160 *                  pass the U_SUCCESS() test, or else the function returns
    161 *                  immediately. Check for U_FAILURE() on output or use with
    162 *                  function chaining. (See User Guide for details.)
    163 * @return the requested Normalizer2, if successful
    164 * @stable ICU 49
    165 */
    166 U_CAPI const UNormalizer2 * U_EXPORT2
    167 unorm2_getNFKCInstance(UErrorCode *pErrorCode);
    168 
    169 /**
    170 * Returns a UNormalizer2 instance for Unicode NFKD normalization.
    171 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
    172 * Returns an unmodifiable singleton instance. Do not delete it.
    173 * @param pErrorCode Standard ICU error code. Its input value must
    174 *                  pass the U_SUCCESS() test, or else the function returns
    175 *                  immediately. Check for U_FAILURE() on output or use with
    176 *                  function chaining. (See User Guide for details.)
    177 * @return the requested Normalizer2, if successful
    178 * @stable ICU 49
    179 */
    180 U_CAPI const UNormalizer2 * U_EXPORT2
    181 unorm2_getNFKDInstance(UErrorCode *pErrorCode);
    182 
    183 /**
    184 * Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
    185 * which is equivalent to applying the NFKC_Casefold mappings and then NFC.
    186 * See https://www.unicode.org/reports/tr44/#NFKC_Casefold
    187 *
    188 * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
    189 * Returns an unmodifiable singleton instance. Do not delete it.
    190 * @param pErrorCode Standard ICU error code. Its input value must
    191 *                  pass the U_SUCCESS() test, or else the function returns
    192 *                  immediately. Check for U_FAILURE() on output or use with
    193 *                  function chaining. (See User Guide for details.)
    194 * @return the requested Normalizer2, if successful
    195 * @stable ICU 49
    196 */
    197 U_CAPI const UNormalizer2 * U_EXPORT2
    198 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
    199 
    200 /**
    201 * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
    202 * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
    203 * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
    204 *
    205 * Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
    206 * Returns an unmodifiable singleton instance. Do not delete it.
    207 * @param pErrorCode Standard ICU error code. Its input value must
    208 *                  pass the U_SUCCESS() test, or else the function returns
    209 *                  immediately. Check for U_FAILURE() on output or use with
    210 *                  function chaining. (See User Guide for details.)
    211 * @return the requested Normalizer2, if successful
    212 * @stable ICU 74
    213 */
    214 U_CAPI const UNormalizer2 * U_EXPORT2
    215 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
    216 
    217 /**
    218 * Returns a UNormalizer2 instance which uses the specified data file
    219 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
    220 * and which composes or decomposes text according to the specified mode.
    221 * Returns an unmodifiable singleton instance. Do not delete it.
    222 *
    223 * Use packageName=NULL for data files that are part of ICU's own data.
    224 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
    225 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
    226 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
    227 *
    228 * @param packageName NULL for ICU built-in data, otherwise application data package name
    229 * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
    230 * @param mode normalization mode (compose or decompose etc.)
    231 * @param pErrorCode Standard ICU error code. Its input value must
    232 *                  pass the U_SUCCESS() test, or else the function returns
    233 *                  immediately. Check for U_FAILURE() on output or use with
    234 *                  function chaining. (See User Guide for details.)
    235 * @return the requested UNormalizer2, if successful
    236 * @stable ICU 4.4
    237 */
    238 U_CAPI const UNormalizer2 * U_EXPORT2
    239 unorm2_getInstance(const char *packageName,
    240                   const char *name,
    241                   UNormalization2Mode mode,
    242                   UErrorCode *pErrorCode);
    243 
    244 /**
    245 * Constructs a filtered normalizer wrapping any UNormalizer2 instance
    246 * and a filter set.
    247 * Both are aliased and must not be modified or deleted while this object
    248 * is used.
    249 * The filter set should be frozen; otherwise the performance will suffer greatly.
    250 * @param norm2 wrapped UNormalizer2 instance
    251 * @param filterSet USet which determines the characters to be normalized
    252 * @param pErrorCode Standard ICU error code. Its input value must
    253 *                   pass the U_SUCCESS() test, or else the function returns
    254 *                   immediately. Check for U_FAILURE() on output or use with
    255 *                   function chaining. (See User Guide for details.)
    256 * @return the requested UNormalizer2, if successful
    257 * @stable ICU 4.4
    258 */
    259 U_CAPI UNormalizer2 * U_EXPORT2
    260 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
    261 
    262 /**
    263 * Closes a UNormalizer2 instance from unorm2_openFiltered().
    264 * Do not close instances from unorm2_getInstance()!
    265 * @param norm2 UNormalizer2 instance to be closed
    266 * @stable ICU 4.4
    267 */
    268 U_CAPI void U_EXPORT2
    269 unorm2_close(UNormalizer2 *norm2);
    270 
    271 #if U_SHOW_CPLUSPLUS_API
    272 
    273 U_NAMESPACE_BEGIN
    274 
    275 /**
    276 * \class LocalUNormalizer2Pointer
    277 * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
    278 * For most methods see the LocalPointerBase base class.
    279 *
    280 * @see LocalPointerBase
    281 * @see LocalPointer
    282 * @stable ICU 4.4
    283 */
    284 U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
    285 
    286 U_NAMESPACE_END
    287 
    288 #endif
    289 
    290 /**
    291 * Writes the normalized form of the source string to the destination string
    292 * (replacing its contents) and returns the length of the destination string.
    293 * The source and destination strings must be different buffers.
    294 * @param norm2 UNormalizer2 instance
    295 * @param src source string
    296 * @param length length of the source string, or -1 if NUL-terminated
    297 * @param dest destination string; its contents is replaced with normalized src
    298 * @param capacity number of UChars that can be written to dest
    299 * @param pErrorCode Standard ICU error code. Its input value must
    300 *                   pass the U_SUCCESS() test, or else the function returns
    301 *                   immediately. Check for U_FAILURE() on output or use with
    302 *                   function chaining. (See User Guide for details.)
    303 * @return dest
    304 * @stable ICU 4.4
    305 */
    306 U_CAPI int32_t U_EXPORT2
    307 unorm2_normalize(const UNormalizer2 *norm2,
    308                 const UChar *src, int32_t length,
    309                 UChar *dest, int32_t capacity,
    310                 UErrorCode *pErrorCode);
    311 /**
    312 * Appends the normalized form of the second string to the first string
    313 * (merging them at the boundary) and returns the length of the first string.
    314 * The result is normalized if the first string was normalized.
    315 * The first and second strings must be different buffers.
    316 * @param norm2 UNormalizer2 instance
    317 * @param first string, should be normalized
    318 * @param firstLength length of the first string, or -1 if NUL-terminated
    319 * @param firstCapacity number of UChars that can be written to first
    320 * @param second string, will be normalized
    321 * @param secondLength length of the source string, or -1 if NUL-terminated
    322 * @param pErrorCode Standard ICU error code. Its input value must
    323 *                   pass the U_SUCCESS() test, or else the function returns
    324 *                   immediately. Check for U_FAILURE() on output or use with
    325 *                   function chaining. (See User Guide for details.)
    326 * @return first
    327 * @stable ICU 4.4
    328 */
    329 U_CAPI int32_t U_EXPORT2
    330 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    331                                UChar *first, int32_t firstLength, int32_t firstCapacity,
    332                                const UChar *second, int32_t secondLength,
    333                                UErrorCode *pErrorCode);
    334 /**
    335 * Appends the second string to the first string
    336 * (merging them at the boundary) and returns the length of the first string.
    337 * The result is normalized if both the strings were normalized.
    338 * The first and second strings must be different buffers.
    339 * @param norm2 UNormalizer2 instance
    340 * @param first string, should be normalized
    341 * @param firstLength length of the first string, or -1 if NUL-terminated
    342 * @param firstCapacity number of UChars that can be written to first
    343 * @param second string, should be normalized
    344 * @param secondLength length of the source string, or -1 if NUL-terminated
    345 * @param pErrorCode Standard ICU error code. Its input value must
    346 *                   pass the U_SUCCESS() test, or else the function returns
    347 *                   immediately. Check for U_FAILURE() on output or use with
    348 *                   function chaining. (See User Guide for details.)
    349 * @return first
    350 * @stable ICU 4.4
    351 */
    352 U_CAPI int32_t U_EXPORT2
    353 unorm2_append(const UNormalizer2 *norm2,
    354              UChar *first, int32_t firstLength, int32_t firstCapacity,
    355              const UChar *second, int32_t secondLength,
    356              UErrorCode *pErrorCode);
    357 
    358 /**
    359 * Gets the decomposition mapping of c.
    360 * Roughly equivalent to normalizing the String form of c
    361 * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
    362 * returns a negative value and does not write a string
    363 * if c does not have a decomposition mapping in this instance's data.
    364 * This function is independent of the mode of the UNormalizer2.
    365 * @param norm2 UNormalizer2 instance
    366 * @param c code point
    367 * @param decomposition String buffer which will be set to c's
    368 *                      decomposition mapping, if there is one.
    369 * @param capacity number of UChars that can be written to decomposition
    370 * @param pErrorCode Standard ICU error code. Its input value must
    371 *                   pass the U_SUCCESS() test, or else the function returns
    372 *                   immediately. Check for U_FAILURE() on output or use with
    373 *                   function chaining. (See User Guide for details.)
    374 * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
    375 * @stable ICU 4.6
    376 */
    377 U_CAPI int32_t U_EXPORT2
    378 unorm2_getDecomposition(const UNormalizer2 *norm2,
    379                        UChar32 c, UChar *decomposition, int32_t capacity,
    380                        UErrorCode *pErrorCode);
    381 
    382 /**
    383 * Gets the raw decomposition mapping of c.
    384 *
    385 * This is similar to the unorm2_getDecomposition() function but returns the
    386 * raw decomposition mapping as specified in UnicodeData.txt or
    387 * (for custom data) in the mapping files processed by the gennorm2 tool.
    388 * By contrast, unorm2_getDecomposition() returns the processed,
    389 * recursively-decomposed version of this mapping.
    390 *
    391 * When used on a standard NFKC Normalizer2 instance,
    392 * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
    393 *
    394 * When used on a standard NFC Normalizer2 instance,
    395 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
    396 * in this case, the result contains either one or two code points (=1..4 UChars).
    397 *
    398 * This function is independent of the mode of the UNormalizer2.
    399 * @param norm2 UNormalizer2 instance
    400 * @param c code point
    401 * @param decomposition String buffer which will be set to c's
    402 *                      raw decomposition mapping, if there is one.
    403 * @param capacity number of UChars that can be written to decomposition
    404 * @param pErrorCode Standard ICU error code. Its input value must
    405 *                   pass the U_SUCCESS() test, or else the function returns
    406 *                   immediately. Check for U_FAILURE() on output or use with
    407 *                   function chaining. (See User Guide for details.)
    408 * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
    409 * @stable ICU 49
    410 */
    411 U_CAPI int32_t U_EXPORT2
    412 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
    413                           UChar32 c, UChar *decomposition, int32_t capacity,
    414                           UErrorCode *pErrorCode);
    415 
    416 /**
    417 * Performs pairwise composition of a & b and returns the composite if there is one.
    418 *
    419 * Returns a composite code point c only if c has a two-way mapping to a+b.
    420 * In standard Unicode normalization, this means that
    421 * c has a canonical decomposition to a+b
    422 * and c does not have the Full_Composition_Exclusion property.
    423 *
    424 * This function is independent of the mode of the UNormalizer2.
    425 * @param norm2 UNormalizer2 instance
    426 * @param a A (normalization starter) code point.
    427 * @param b Another code point.
    428 * @return The non-negative composite code point if there is one; otherwise a negative value.
    429 * @stable ICU 49
    430 */
    431 U_CAPI UChar32 U_EXPORT2
    432 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
    433 
    434 /**
    435 * Gets the combining class of c.
    436 * The default implementation returns 0
    437 * but all standard implementations return the Unicode Canonical_Combining_Class value.
    438 * @param norm2 UNormalizer2 instance
    439 * @param c code point
    440 * @return c's combining class
    441 * @stable ICU 49
    442 */
    443 U_CAPI uint8_t U_EXPORT2
    444 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
    445 
    446 /**
    447 * Tests if the string is normalized.
    448 * Internally, in cases where the quickCheck() method would return "maybe"
    449 * (which is only possible for the two COMPOSE modes) this method
    450 * resolves to "yes" or "no" to provide a definitive result,
    451 * at the cost of doing more work in those cases.
    452 * @param norm2 UNormalizer2 instance
    453 * @param s input string
    454 * @param length length of the string, or -1 if NUL-terminated
    455 * @param pErrorCode Standard ICU error code. Its input value must
    456 *                   pass the U_SUCCESS() test, or else the function returns
    457 *                   immediately. Check for U_FAILURE() on output or use with
    458 *                   function chaining. (See User Guide for details.)
    459 * @return true if s is normalized
    460 * @stable ICU 4.4
    461 */
    462 U_CAPI UBool U_EXPORT2
    463 unorm2_isNormalized(const UNormalizer2 *norm2,
    464                    const UChar *s, int32_t length,
    465                    UErrorCode *pErrorCode);
    466 
    467 /**
    468 * Tests if the string is normalized.
    469 * For the two COMPOSE modes, the result could be "maybe" in cases that
    470 * would take a little more work to resolve definitively.
    471 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
    472 * combination of quick check + normalization, to avoid
    473 * re-checking the "yes" prefix.
    474 * @param norm2 UNormalizer2 instance
    475 * @param s input string
    476 * @param length length of the string, or -1 if NUL-terminated
    477 * @param pErrorCode Standard ICU error code. Its input value must
    478 *                   pass the U_SUCCESS() test, or else the function returns
    479 *                   immediately. Check for U_FAILURE() on output or use with
    480 *                   function chaining. (See User Guide for details.)
    481 * @return UNormalizationCheckResult
    482 * @stable ICU 4.4
    483 */
    484 U_CAPI UNormalizationCheckResult U_EXPORT2
    485 unorm2_quickCheck(const UNormalizer2 *norm2,
    486                  const UChar *s, int32_t length,
    487                  UErrorCode *pErrorCode);
    488 
    489 /**
    490 * Returns the end of the normalized substring of the input string.
    491 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
    492 * the substring <code>UnicodeString(s, 0, end)</code>
    493 * will pass the quick check with a "yes" result.
    494 *
    495 * The returned end index is usually one or more characters before the
    496 * "no" or "maybe" character: The end index is at a normalization boundary.
    497 * (See the class documentation for more about normalization boundaries.)
    498 *
    499 * When the goal is a normalized string and most input strings are expected
    500 * to be normalized already, then call this method,
    501 * and if it returns a prefix shorter than the input string,
    502 * copy that prefix and use normalizeSecondAndAppend() for the remainder.
    503 * @param norm2 UNormalizer2 instance
    504 * @param s input string
    505 * @param length length of the string, or -1 if NUL-terminated
    506 * @param pErrorCode Standard ICU error code. Its input value must
    507 *                   pass the U_SUCCESS() test, or else the function returns
    508 *                   immediately. Check for U_FAILURE() on output or use with
    509 *                   function chaining. (See User Guide for details.)
    510 * @return "yes" span end index
    511 * @stable ICU 4.4
    512 */
    513 U_CAPI int32_t U_EXPORT2
    514 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    515                         const UChar *s, int32_t length,
    516                         UErrorCode *pErrorCode);
    517 
    518 /**
    519 * Tests if the character always has a normalization boundary before it,
    520 * regardless of context.
    521 * For details see the Normalizer2 base class documentation.
    522 * @param norm2 UNormalizer2 instance
    523 * @param c character to test
    524 * @return true if c has a normalization boundary before it
    525 * @stable ICU 4.4
    526 */
    527 U_CAPI UBool U_EXPORT2
    528 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
    529 
    530 /**
    531 * Tests if the character always has a normalization boundary after it,
    532 * regardless of context.
    533 * For details see the Normalizer2 base class documentation.
    534 * @param norm2 UNormalizer2 instance
    535 * @param c character to test
    536 * @return true if c has a normalization boundary after it
    537 * @stable ICU 4.4
    538 */
    539 U_CAPI UBool U_EXPORT2
    540 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
    541 
    542 /**
    543 * Tests if the character is normalization-inert.
    544 * For details see the Normalizer2 base class documentation.
    545 * @param norm2 UNormalizer2 instance
    546 * @param c character to test
    547 * @return true if c is normalization-inert
    548 * @stable ICU 4.4
    549 */
    550 U_CAPI UBool U_EXPORT2
    551 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
    552 
    553 /**
    554 * Compares two strings for canonical equivalence.
    555 * Further options include case-insensitive comparison and
    556 * code point order (as opposed to code unit order).
    557 *
    558 * Canonical equivalence between two strings is defined as their normalized
    559 * forms (NFD or NFC) being identical.
    560 * This function compares strings incrementally instead of normalizing
    561 * (and optionally case-folding) both strings entirely,
    562 * improving performance significantly.
    563 *
    564 * Bulk normalization is only necessary if the strings do not fulfill the FCD
    565 * conditions. Only in this case, and only if the strings are relatively long,
    566 * is memory allocated temporarily.
    567 * For FCD strings and short non-FCD strings there is no memory allocation.
    568 *
    569 * Semantically, this is equivalent to
    570 *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
    571 * where code point order and foldCase are all optional.
    572 *
    573 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
    574 * the case folding must be performed first, then the normalization.
    575 *
    576 * @param s1 First source string.
    577 * @param length1 Length of first source string, or -1 if NUL-terminated.
    578 *
    579 * @param s2 Second source string.
    580 * @param length2 Length of second source string, or -1 if NUL-terminated.
    581 *
    582 * @param options A bit set of options:
    583 *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    584 *     Case-sensitive comparison in code unit order, and the input strings
    585 *     are quick-checked for FCD.
    586 *
    587 *   - UNORM_INPUT_IS_FCD
    588 *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
    589 *     If not set, the function will quickCheck for FCD
    590 *     and normalize if necessary.
    591 *
    592 *   - U_COMPARE_CODE_POINT_ORDER
    593 *     Set to choose code point order instead of code unit order
    594 *     (see u_strCompare for details).
    595 *
    596 *   - U_COMPARE_IGNORE_CASE
    597 *     Set to compare strings case-insensitively using case folding,
    598 *     instead of case-sensitively.
    599 *     If set, then the following case folding options are used.
    600 *
    601 *   - Options as used with case-insensitive comparisons, currently:
    602 *
    603 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    604 *    (see u_strCaseCompare for details)
    605 *
    606 *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
    607 *
    608 * @param pErrorCode ICU error code in/out parameter.
    609 *                   Must fulfill U_SUCCESS before the function call.
    610 * @return <0 or 0 or >0 as usual for string comparisons
    611 *
    612 * @see unorm_normalize
    613 * @see UNORM_FCD
    614 * @see u_strCompare
    615 * @see u_strCaseCompare
    616 *
    617 * @stable ICU 2.2
    618 */
    619 U_CAPI int32_t U_EXPORT2
    620 unorm_compare(const UChar *s1, int32_t length1,
    621              const UChar *s2, int32_t length2,
    622              uint32_t options,
    623              UErrorCode *pErrorCode);
    624 
    625 #endif  /* !UCONFIG_NO_NORMALIZATION */
    626 #endif  /* __UNORM2_H__ */