tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucol.h (68969B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
      6 * All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #ifndef UCOL_H
     11 #define UCOL_H
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_COLLATION
     16 
     17 #include "unicode/unorm.h"
     18 #include "unicode/parseerr.h"
     19 #include "unicode/uloc.h"
     20 #include "unicode/uset.h"
     21 #include "unicode/uscript.h"
     22 
     23 #if U_SHOW_CPLUSPLUS_API
     24 #include "unicode/localpointer.h"
     25 #endif   // U_SHOW_CPLUSPLUS_API
     26 
     27 /**
     28 * \file
     29 * \brief C API: Collator 
     30 *
     31 * <h2> Collator C API </h2>
     32 *
     33 * The C API for Collator performs locale-sensitive
     34 * string comparison. You use this service to build
     35 * searching and sorting routines for natural language text.
     36 * <p>
     37 * For more information about the collation service see 
     38 * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>.
     39 * <p>
     40 * Collation service provides correct sorting orders for most locales supported in ICU. 
     41 * If specific data for a locale is not available, the orders eventually falls back
     42 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 
     43 * <p>
     44 * Sort ordering may be customized by providing your own set of rules. For more on
     45 * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization">
     46 * Collation Customization</a> section of the User Guide.
     47 * <p>
     48 * @see         UCollationResult
     49 * @see         UNormalizationMode
     50 * @see         UCollationStrength
     51 * @see         UCollationElements
     52 */
     53 
     54 /** A collator.
     55 *  For usage in C programs.
     56 */
     57 struct UCollator;
     58 /** structure representing a collator object instance 
     59 * @stable ICU 2.0
     60 */
     61 typedef struct UCollator UCollator;
     62 
     63 
     64 /**
     65 * UCOL_LESS is returned if source string is compared to be less than target
     66 * string in the ucol_strcoll() method.
     67 * UCOL_EQUAL is returned if source string is compared to be equal to target
     68 * string in the ucol_strcoll() method.
     69 * UCOL_GREATER is returned if source string is compared to be greater than
     70 * target string in the ucol_strcoll() method.
     71 * @see ucol_strcoll()
     72 * <p>
     73 * Possible values for a comparison result 
     74 * @stable ICU 2.0
     75 */
     76 typedef enum {
     77  /** string a == string b */
     78  UCOL_EQUAL    = 0,
     79  /** string a > string b */
     80  UCOL_GREATER    = 1,
     81  /** string a < string b */
     82  UCOL_LESS    = -1
     83 } UCollationResult ;
     84 
     85 
     86 /** Enum containing attribute values for controlling collation behavior.
     87 * Here are all the allowable values. Not every attribute can take every value. The only
     88 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
     89 * value for that locale 
     90 * @stable ICU 2.0
     91 */
     92 typedef enum {
     93  /** accepted by most attributes */
     94  UCOL_DEFAULT = -1,
     95 
     96  /** Primary collation strength */
     97  UCOL_PRIMARY = 0,
     98  /** Secondary collation strength */
     99  UCOL_SECONDARY = 1,
    100  /** Tertiary collation strength */
    101  UCOL_TERTIARY = 2,
    102  /** Default collation strength */
    103  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
    104  UCOL_CE_STRENGTH_LIMIT,
    105  /** Quaternary collation strength */
    106  UCOL_QUATERNARY=3,
    107  /** Identical collation strength */
    108  UCOL_IDENTICAL=15,
    109  UCOL_STRENGTH_LIMIT,
    110 
    111  /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
    112      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
    113      & UCOL_DECOMPOSITION_MODE*/
    114  UCOL_OFF = 16,
    115  /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
    116      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
    117      & UCOL_DECOMPOSITION_MODE*/
    118  UCOL_ON = 17,
    119  
    120  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
    121  UCOL_SHIFTED = 20,
    122  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
    123  UCOL_NON_IGNORABLE = 21,
    124 
    125  /** Valid for UCOL_CASE_FIRST - 
    126      lower case sorts before upper case */
    127  UCOL_LOWER_FIRST = 24,
    128  /** upper case sorts before lower case */
    129  UCOL_UPPER_FIRST = 25,
    130 
    131 #ifndef U_HIDE_DEPRECATED_API
    132    /**
    133     * One more than the highest normal UColAttributeValue value.
    134     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    135     */
    136  UCOL_ATTRIBUTE_VALUE_COUNT
    137 #endif  /* U_HIDE_DEPRECATED_API */
    138 } UColAttributeValue;
    139 
    140 /**
    141 * Enum containing the codes for reordering segments of the collation table that are not script
    142 * codes. These reordering codes are to be used in conjunction with the script codes.
    143 * @see ucol_getReorderCodes
    144 * @see ucol_setReorderCodes
    145 * @see ucol_getEquivalentReorderCodes
    146 * @see UScriptCode
    147 * @stable ICU 4.8
    148 */
    149 typedef enum {
    150   /**
    151    * A special reordering code that is used to specify the default
    152    * reordering codes for a locale.
    153    * @stable ICU 4.8
    154    */   
    155    UCOL_REORDER_CODE_DEFAULT       = -1,
    156   /**
    157    * A special reordering code that is used to specify no reordering codes.
    158    * @stable ICU 4.8
    159    */   
    160    UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
    161   /**
    162    * A special reordering code that is used to specify all other codes used for
    163    * reordering except for the codes lised as UColReorderCode values and those
    164    * listed explicitly in a reordering.
    165    * @stable ICU 4.8
    166    */   
    167    UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
    168   /**
    169    * Characters with the space property.
    170    * This is equivalent to the rule value "space".
    171    * @stable ICU 4.8
    172    */    
    173    UCOL_REORDER_CODE_SPACE         = 0x1000,
    174   /**
    175    * The first entry in the enumeration of reordering groups. This is intended for use in
    176    * range checking and enumeration of the reorder codes.
    177    * @stable ICU 4.8
    178    */    
    179    UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
    180   /**
    181    * Characters with the punctuation property.
    182    * This is equivalent to the rule value "punct".
    183    * @stable ICU 4.8
    184    */    
    185    UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
    186   /**
    187    * Characters with the symbol property.
    188    * This is equivalent to the rule value "symbol".
    189    * @stable ICU 4.8
    190    */    
    191    UCOL_REORDER_CODE_SYMBOL        = 0x1002,
    192   /**
    193    * Characters with the currency property.
    194    * This is equivalent to the rule value "currency".
    195    * @stable ICU 4.8
    196    */    
    197    UCOL_REORDER_CODE_CURRENCY      = 0x1003,
    198   /**
    199    * Characters with the digit property.
    200    * This is equivalent to the rule value "digit".
    201    * @stable ICU 4.8
    202    */    
    203    UCOL_REORDER_CODE_DIGIT         = 0x1004,
    204 #ifndef U_HIDE_DEPRECATED_API
    205    /**
    206     * One more than the highest normal UColReorderCode value.
    207     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    208     */
    209    UCOL_REORDER_CODE_LIMIT         = 0x1005
    210 #endif  /* U_HIDE_DEPRECATED_API */
    211 } UColReorderCode;
    212 
    213 /**
    214 * Base letter represents a primary difference.  Set comparison
    215 * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
    216 * Use this to set the strength of a Collator object.
    217 * Example of primary difference, "abc" &lt; "abd"
    218 * 
    219 * Diacritical differences on the same base letter represent a secondary
    220 * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
    221 * differences. Use this to set the strength of a Collator object.
    222 * Example of secondary difference, "&auml;" >> "a".
    223 *
    224 * Uppercase and lowercase versions of the same character represents a
    225 * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
    226 * all comparison differences. Use this to set the strength of a Collator
    227 * object.
    228 * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
    229 *
    230 * Two characters are considered "identical" when they have the same
    231 * unicode spellings.  UCOL_IDENTICAL.
    232 * For example, "&auml;" == "&auml;".
    233 *
    234 * UCollationStrength is also used to determine the strength of sort keys 
    235 * generated from UCollator objects
    236 * These values can be now found in the UColAttributeValue enum.
    237 * @stable ICU 2.0
    238 **/
    239 typedef UColAttributeValue UCollationStrength;
    240 
    241 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
    242 * value, as well as the values specific to each one. 
    243 * @stable ICU 2.0
    244 */
    245 typedef enum {
    246     /** Attribute for direction of secondary weights - used in Canadian French.
    247      * Acceptable values are UCOL_ON, which results in secondary weights
    248      * being considered backwards and UCOL_OFF which treats secondary
    249      * weights in the order they appear.
    250      * @stable ICU 2.0
    251      */
    252     UCOL_FRENCH_COLLATION, 
    253     /** Attribute for handling variable elements.
    254      * Acceptable values are UCOL_NON_IGNORABLE
    255      * which treats all the codepoints with non-ignorable
    256      * primary weights in the same way,
    257      * and UCOL_SHIFTED which causes codepoints with primary
    258      * weights that are equal or below the variable top value
    259      * to be ignored on primary level and moved to the quaternary
    260      * level. The default setting in a Collator object depends on the
    261      * locale data loaded from the resources. For most locales, the
    262      * default is UCOL_NON_IGNORABLE, but for others, such as "th",
    263      * the default could be UCOL_SHIFTED.
    264      * @stable ICU 2.0
    265      */
    266     UCOL_ALTERNATE_HANDLING,
    267     /** Controls the ordering of upper and lower case letters.
    268      * Acceptable values are UCOL_OFF, which orders
    269      * upper and lower case letters in accordance to their tertiary
    270      * weights, UCOL_UPPER_FIRST which forces upper case letters to
    271      * sort before lower case letters, and UCOL_LOWER_FIRST which does
    272      * the opposite. The default setting in a Collator object depends on the
    273      * locale data loaded from the resources. For most locales, the
    274      * default is UCOL_OFF, but for others, such as "da" or "mt",
    275      * the default could be UCOL_UPPER.
    276      * @stable ICU 2.0
    277      */
    278     UCOL_CASE_FIRST,
    279     /** Controls whether an extra case level (positioned before the third
    280      * level) is generated or not. Acceptable values are UCOL_OFF,
    281      * when case level is not generated, and UCOL_ON which causes the case
    282      * level to be generated. Contents of the case level are affected by
    283      * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
    284      * accent differences in a string is to set the strength to UCOL_PRIMARY
    285      * and enable case level. The default setting in a Collator object depends
    286      * on the locale data loaded from the resources.
    287      * @stable ICU 2.0
    288      */
    289     UCOL_CASE_LEVEL,
    290     /** Controls whether the normalization check and necessary normalizations
    291      * are performed. When set to UCOL_OFF no normalization check
    292      * is performed. The correctness of the result is guaranteed only if the
    293      * input data is in so-called FCD form (see users manual for more info).
    294      * When set to UCOL_ON, an incremental check is performed to see whether
    295      * the input data is in the FCD form. If the data is not in the FCD form,
    296      * incremental NFD normalization is performed. The default setting in a
    297      * Collator object depends on the locale data loaded from the resources.
    298      * For many locales, the default is UCOL_OFF, but for others, such as "hi"
    299      * "vi', or "bn", * the default could be UCOL_ON.
    300      * @stable ICU 2.0
    301      */
    302     UCOL_NORMALIZATION_MODE, 
    303     /** An alias for UCOL_NORMALIZATION_MODE attribute.
    304      * @stable ICU 2.0
    305      */
    306     UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
    307     /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
    308      * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
    309      * for most locales (except Japanese) is tertiary.
    310      *
    311      * Quaternary strength 
    312      * is useful when combined with shifted setting for alternate handling
    313      * attribute and for JIS X 4061 collation, when it is used to distinguish
    314      * between Katakana and Hiragana.
    315      * Otherwise, quaternary level
    316      * is affected only by the number of non-ignorable code points in
    317      * the string.
    318      *
    319      * Identical strength is rarely useful, as it amounts 
    320      * to codepoints of the NFD form of the string.
    321      * @stable ICU 2.0
    322      */
    323     UCOL_STRENGTH,  
    324 #ifndef U_HIDE_DEPRECATED_API
    325     /** When turned on, this attribute positions Hiragana before all  
    326      * non-ignorables on quaternary level This is a sneaky way to produce JIS
    327      * sort order.
    328      *
    329      * This attribute was an implementation detail of the CLDR Japanese tailoring.
    330      * Since ICU 50, this attribute is not settable any more via API functions.
    331      * Since CLDR 25/ICU 53, explicit quaternary relations are used
    332      * to achieve the same Japanese sort order.
    333      *
    334      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
    335      */
    336     UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
    337 #endif  /* U_HIDE_DEPRECATED_API */
    338     /**
    339      * When turned on, this attribute makes
    340      * substrings of digits sort according to their numeric values.
    341      *
    342      * This is a way to get '100' to sort AFTER '2'. Note that the longest
    343      * digit substring that can be treated as a single unit is
    344      * 254 digits (not counting leading zeros). If a digit substring is
    345      * longer than that, the digits beyond the limit will be treated as a
    346      * separate digit substring.
    347      *
    348      * A "digit" in this sense is a code point with General_Category=Nd,
    349      * which does not include circled numbers, roman numerals, etc.
    350      * Only a contiguous digit substring is considered, that is,
    351      * non-negative integers without separators.
    352      * There is no support for plus/minus signs, decimals, exponents, etc.
    353      *
    354      * @stable ICU 2.8
    355      */
    356     UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 
    357 
    358    /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
    359     * it is needed for layout of RuleBasedCollator object. */
    360 #ifndef U_FORCE_HIDE_DEPRECATED_API
    361    /**
    362     * One more than the highest normal UColAttribute value.
    363     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    364     */
    365     UCOL_ATTRIBUTE_COUNT
    366 #endif  // U_FORCE_HIDE_DEPRECATED_API
    367 } UColAttribute;
    368 
    369 /** Options for retrieving the rule string 
    370 *  @stable ICU 2.0
    371 */
    372 typedef enum {
    373  /**
    374   * Retrieves the tailoring rules only.
    375   * Same as calling the version of getRules() without UColRuleOption.
    376   * @stable ICU 2.0
    377   */
    378  UCOL_TAILORING_ONLY, 
    379  /**
    380   * Retrieves the "UCA rules" concatenated with the tailoring rules.
    381   * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
    382   * They are almost never used or useful at runtime and can be removed from the data.
    383   * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
    384   * @stable ICU 2.0
    385   */
    386  UCOL_FULL_RULES 
    387 } UColRuleOption ;
    388 
    389 /**
    390 * Open a UCollator for comparing strings.
    391 *
    392 * For some languages, multiple collation types are available;
    393 * for example, "de@collation=phonebook".
    394 * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
    395 * in the old locale extension syntax ("el@colCaseFirst=upper")
    396 * or in language tag syntax ("el-u-kf-upper").
    397 * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
    398 *
    399 * The UCollator pointer is used in all the calls to the Collation 
    400 * service. After finished, collator must be disposed of by calling
    401 * {@link #ucol_close }.
    402 * @param loc The locale containing the required collation rules. 
    403 *            Special values for locales can be passed in - 
    404 *            if NULL is passed for the locale, the default locale
    405 *            collation rules will be used. If empty string ("") or
    406 *            "root" are passed, the root collator will be returned.
    407 * @param status A pointer to a UErrorCode to receive any errors
    408 * @return A pointer to a UCollator, or 0 if an error occurred.
    409 * @see ucol_openRules
    410 * @see ucol_clone
    411 * @see ucol_close
    412 * @stable ICU 2.0
    413 */
    414 U_CAPI UCollator* U_EXPORT2 
    415 ucol_open(const char *loc, UErrorCode *status);
    416 
    417 /**
    418 * Produce a UCollator instance according to the rules supplied.
    419 * The rules are used to change the default ordering, defined in the
    420 * UCA in a process called tailoring. The resulting UCollator pointer
    421 * can be used in the same way as the one obtained by {@link #ucol_open }.
    422 * @param rules A string describing the collation rules. For the syntax
    423 *              of the rules please see users guide.
    424 * @param rulesLength The length of rules, or -1 if null-terminated.
    425 * @param normalizationMode The normalization mode: One of
    426 *             UCOL_OFF     (expect the text to not need normalization),
    427 *             UCOL_ON      (normalize), or
    428 *             UCOL_DEFAULT (set the mode according to the rules)
    429 * @param strength The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
    430 *                 UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT_STRENGTH.
    431 *                 If you want to set the strength via the rules,
    432 *                 then use UCOL_DEFAULT to not override that.
    433 * @param parseError  A pointer to UParseError to receive information about errors
    434 *                    occurred during parsing. This argument can currently be set
    435 *                    to NULL, but at users own risk. Please provide a real structure.
    436 * @param status A pointer to a UErrorCode to receive any errors
    437 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
    438 *         of error - please use status argument to check for errors.
    439 * @see ucol_open
    440 * @see ucol_clone
    441 * @see ucol_close
    442 * @stable ICU 2.0
    443 */
    444 U_CAPI UCollator* U_EXPORT2 
    445 ucol_openRules( const UChar        *rules,
    446                int32_t            rulesLength,
    447                UColAttributeValue normalizationMode,
    448                UCollationStrength strength,
    449                UParseError        *parseError,
    450                UErrorCode         *status);
    451 
    452 #ifndef U_HIDE_DEPRECATED_API
    453 /** 
    454 * Open a collator defined by a short form string.
    455 * The structure and the syntax of the string is defined in the "Naming collators"
    456 * section of the users guide: 
    457 * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
    458 * Attributes are overridden by the subsequent attributes. So, for "S2_S3", final
    459 * strength will be 3. 3066bis locale overrides individual locale parts.
    460 * The call to this function is equivalent to a call to ucol_open, followed by a 
    461 * series of calls to ucol_setAttribute and ucol_setVariableTop.
    462 * @param definition A short string containing a locale and a set of attributes. 
    463 *                   Attributes not explicitly mentioned are left at the default
    464 *                   state for a locale.
    465 * @param parseError if not NULL, structure that will get filled with error's pre
    466 *                   and post context in case of error.
    467 * @param forceDefaults if false, the settings that are the same as the collator 
    468 *                   default settings will not be applied (for example, setting
    469 *                   French secondary on a French collator would not be executed). 
    470 *                   If true, all the settings will be applied regardless of the 
    471 *                   collator default value. If the definition
    472 *                   strings are to be cached, should be set to false.
    473 * @param status     Error code. Apart from regular error conditions connected to 
    474 *                   instantiating collators (like out of memory or similar), this
    475 *                   API will return an error if an invalid attribute or attribute/value
    476 *                   combination is specified.
    477 * @return           A pointer to a UCollator or 0 if an error occurred (including an 
    478 *                   invalid attribute).
    479 * @see ucol_open
    480 * @see ucol_setAttribute
    481 * @see ucol_setVariableTop
    482 * @see ucol_getShortDefinitionString
    483 * @see ucol_normalizeShortDefinitionString
    484 * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
    485 */
    486 U_DEPRECATED UCollator* U_EXPORT2
    487 ucol_openFromShortString( const char *definition,
    488                          UBool forceDefaults,
    489                          UParseError *parseError,
    490                          UErrorCode *status);
    491 #endif  /* U_HIDE_DEPRECATED_API */
    492 
    493 #ifndef U_HIDE_DEPRECATED_API
    494 /**
    495 * Get a set containing the contractions defined by the collator. The set includes
    496 * both the root collator's contractions and the contractions defined by the collator. This set
    497 * will contain only strings. If a tailoring explicitly suppresses contractions from 
    498 * the root collator (like Russian), removed contractions will not be in the resulting set.
    499 * @param coll collator 
    500 * @param conts the set to hold the result. It gets emptied before
    501 *              contractions are added. 
    502 * @param status to hold the error code
    503 * @return the size of the contraction set
    504 *
    505 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
    506 */
    507 U_DEPRECATED int32_t U_EXPORT2
    508 ucol_getContractions( const UCollator *coll,
    509                  USet *conts,
    510                  UErrorCode *status);
    511 #endif  /* U_HIDE_DEPRECATED_API */
    512 
    513 /**
    514 * Get a set containing the expansions defined by the collator. The set includes
    515 * both the root collator's expansions and the expansions defined by the tailoring
    516 * @param coll collator
    517 * @param contractions if not NULL, the set to hold the contractions
    518 * @param expansions if not NULL, the set to hold the expansions
    519 * @param addPrefixes add the prefix contextual elements to contractions
    520 * @param status to hold the error code
    521 *
    522 * @stable ICU 3.4
    523 */
    524 U_CAPI void U_EXPORT2
    525 ucol_getContractionsAndExpansions( const UCollator *coll,
    526                  USet *contractions, USet *expansions,
    527                  UBool addPrefixes, UErrorCode *status);
    528 
    529 /** 
    530 * Close a UCollator.
    531 * Once closed, a UCollator should not be used. Every open collator should
    532 * be closed. Otherwise, a memory leak will result.
    533 * @param coll The UCollator to close.
    534 * @see ucol_open
    535 * @see ucol_openRules
    536 * @see ucol_clone
    537 * @stable ICU 2.0
    538 */
    539 U_CAPI void U_EXPORT2 
    540 ucol_close(UCollator *coll);
    541 
    542 #if U_SHOW_CPLUSPLUS_API
    543 
    544 U_NAMESPACE_BEGIN
    545 
    546 /**
    547 * \class LocalUCollatorPointer
    548 * "Smart pointer" class, closes a UCollator via ucol_close().
    549 * For most methods see the LocalPointerBase base class.
    550 *
    551 * @see LocalPointerBase
    552 * @see LocalPointer
    553 * @stable ICU 4.4
    554 */
    555 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
    556 
    557 U_NAMESPACE_END
    558 
    559 #endif
    560 
    561 /**
    562 * Compare two strings.
    563 * The strings will be compared using the options already specified.
    564 * @param coll The UCollator containing the comparison rules.
    565 * @param source The source string.
    566 * @param sourceLength The length of source, or -1 if null-terminated.
    567 * @param target The target string.
    568 * @param targetLength The length of target, or -1 if null-terminated.
    569 * @return The result of comparing the strings; one of UCOL_EQUAL,
    570 * UCOL_GREATER, UCOL_LESS
    571 * @see ucol_greater
    572 * @see ucol_greaterOrEqual
    573 * @see ucol_equal
    574 * @stable ICU 2.0
    575 */
    576 U_CAPI UCollationResult U_EXPORT2 
    577 ucol_strcoll(    const    UCollator    *coll,
    578        const    UChar        *source,
    579        int32_t            sourceLength,
    580        const    UChar        *target,
    581        int32_t            targetLength);
    582 
    583 /** 
    584 * Compare two strings in UTF-8. 
    585 * The strings will be compared using the options already specified. 
    586 * Note: When input string contains malformed a UTF-8 byte sequence, 
    587 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
    588 * @param coll The UCollator containing the comparison rules. 
    589 * @param source The source UTF-8 string. 
    590 * @param sourceLength The length of source, or -1 if null-terminated. 
    591 * @param target The target UTF-8 string. 
    592 * @param targetLength The length of target, or -1 if null-terminated. 
    593 * @param status A pointer to a UErrorCode to receive any errors 
    594 * @return The result of comparing the strings; one of UCOL_EQUAL, 
    595 * UCOL_GREATER, UCOL_LESS 
    596 * @see ucol_greater 
    597 * @see ucol_greaterOrEqual 
    598 * @see ucol_equal 
    599 * @stable ICU 50 
    600 */ 
    601 U_CAPI UCollationResult U_EXPORT2
    602 ucol_strcollUTF8(
    603        const UCollator *coll,
    604        const char      *source,
    605        int32_t         sourceLength,
    606        const char      *target,
    607        int32_t         targetLength,
    608        UErrorCode      *status);
    609 
    610 /**
    611 * Determine if one string is greater than another.
    612 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
    613 * @param coll The UCollator containing the comparison rules.
    614 * @param source The source string.
    615 * @param sourceLength The length of source, or -1 if null-terminated.
    616 * @param target The target string.
    617 * @param targetLength The length of target, or -1 if null-terminated.
    618 * @return true if source is greater than target, false otherwise.
    619 * @see ucol_strcoll
    620 * @see ucol_greaterOrEqual
    621 * @see ucol_equal
    622 * @stable ICU 2.0
    623 */
    624 U_CAPI UBool U_EXPORT2 
    625 ucol_greater(const UCollator *coll,
    626             const UChar     *source, int32_t sourceLength,
    627             const UChar     *target, int32_t targetLength);
    628 
    629 /**
    630 * Determine if one string is greater than or equal to another.
    631 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
    632 * @param coll The UCollator containing the comparison rules.
    633 * @param source The source string.
    634 * @param sourceLength The length of source, or -1 if null-terminated.
    635 * @param target The target string.
    636 * @param targetLength The length of target, or -1 if null-terminated.
    637 * @return true if source is greater than or equal to target, false otherwise.
    638 * @see ucol_strcoll
    639 * @see ucol_greater
    640 * @see ucol_equal
    641 * @stable ICU 2.0
    642 */
    643 U_CAPI UBool U_EXPORT2 
    644 ucol_greaterOrEqual(const UCollator *coll,
    645                    const UChar     *source, int32_t sourceLength,
    646                    const UChar     *target, int32_t targetLength);
    647 
    648 /**
    649 * Compare two strings for equality.
    650 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
    651 * @param coll The UCollator containing the comparison rules.
    652 * @param source The source string.
    653 * @param sourceLength The length of source, or -1 if null-terminated.
    654 * @param target The target string.
    655 * @param targetLength The length of target, or -1 if null-terminated.
    656 * @return true if source is equal to target, false otherwise
    657 * @see ucol_strcoll
    658 * @see ucol_greater
    659 * @see ucol_greaterOrEqual
    660 * @stable ICU 2.0
    661 */
    662 U_CAPI UBool U_EXPORT2 
    663 ucol_equal(const UCollator *coll,
    664           const UChar     *source, int32_t sourceLength,
    665           const UChar     *target, int32_t targetLength);
    666 
    667 /**
    668 * Compare two UTF-8 encoded strings.
    669 * The strings will be compared using the options already specified.
    670 * @param coll The UCollator containing the comparison rules.
    671 * @param sIter The source string iterator.
    672 * @param tIter The target string iterator.
    673 * @return The result of comparing the strings; one of UCOL_EQUAL,
    674 * UCOL_GREATER, UCOL_LESS
    675 * @param status A pointer to a UErrorCode to receive any errors
    676 * @see ucol_strcoll
    677 * @stable ICU 2.6
    678 */
    679 U_CAPI UCollationResult U_EXPORT2 
    680 ucol_strcollIter(  const    UCollator    *coll,
    681                  UCharIterator *sIter,
    682                  UCharIterator *tIter,
    683                  UErrorCode *status);
    684 
    685 /**
    686 * Get the collation strength used in a UCollator.
    687 * The strength influences how strings are compared.
    688 * @param coll The UCollator to query.
    689 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
    690 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
    691 * @see ucol_setStrength
    692 * @stable ICU 2.0
    693 */
    694 U_CAPI UCollationStrength U_EXPORT2 
    695 ucol_getStrength(const UCollator *coll);
    696 
    697 /**
    698 * Set the collation strength used in a UCollator.
    699 * The strength influences how strings are compared.
    700 * @param coll The UCollator to set.
    701 * @param strength The desired collation strength; one of UCOL_PRIMARY, 
    702 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
    703 * @see ucol_getStrength
    704 * @stable ICU 2.0
    705 */
    706 U_CAPI void U_EXPORT2 
    707 ucol_setStrength(UCollator *coll,
    708                 UCollationStrength strength);
    709 
    710 /**
    711 * Retrieves the reordering codes for this collator.
    712 * These reordering codes are a combination of UScript codes and UColReorderCode entries.
    713 * @param coll The UCollator to query.
    714 * @param dest The array to fill with the script ordering.
    715 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 
    716 * will only return the length of the result without writing any codes (pre-flighting).
    717 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 
    718 * failure before the function call.
    719 * @return The number of reordering codes written to the dest array.
    720 * @see ucol_setReorderCodes
    721 * @see ucol_getEquivalentReorderCodes
    722 * @see UScriptCode
    723 * @see UColReorderCode
    724 * @stable ICU 4.8
    725 */
    726 U_CAPI int32_t U_EXPORT2 
    727 ucol_getReorderCodes(const UCollator* coll,
    728                    int32_t* dest,
    729                    int32_t destCapacity,
    730                    UErrorCode *pErrorCode);
    731 /** 
    732 * Sets the reordering codes for this collator.
    733 * Collation reordering allows scripts and some other groups of characters
    734 * to be moved relative to each other. This reordering is done on top of
    735 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 
    736 * at the start and/or the end of the collation order. These groups are specified using
    737 * UScript codes and UColReorderCode entries.
    738 *
    739 * <p>By default, reordering codes specified for the start of the order are placed in the 
    740 * order given after several special non-script blocks. These special groups of characters
    741 * are space, punctuation, symbol, currency, and digit. These special groups are represented with
    742 * UColReorderCode entries. Script groups can be intermingled with 
    743 * these special non-script groups if those special groups are explicitly specified in the reordering.
    744 *
    745 * <p>The special code OTHERS stands for any script that is not explicitly 
    746 * mentioned in the list of reordering codes given. Anything that is after OTHERS
    747 * will go at the very end of the reordering in the order given.
    748 *
    749 * <p>The special reorder code DEFAULT will reset the reordering for this collator
    750 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
    751 * was specified when this collator was created from resource data or from rules. The 
    752 * DEFAULT code <b>must</b> be the sole code supplied when it is used.
    753 * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
    754 *
    755 * <p>The special reorder code NONE will remove any reordering for this collator.
    756 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 
    757 * NONE code <b>must</b> be the sole code supplied when it is used.
    758 *
    759 * @param coll The UCollator to set.
    760 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
    761 * length is also set to 0. An empty array will clear any reordering codes on the collator.
    762 * @param reorderCodesLength The length of reorderCodes.
    763 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
    764 * failure before the function call.
    765 * @see ucol_getReorderCodes
    766 * @see ucol_getEquivalentReorderCodes
    767 * @see UScriptCode
    768 * @see UColReorderCode
    769 * @stable ICU 4.8
    770 */ 
    771 U_CAPI void U_EXPORT2 
    772 ucol_setReorderCodes(UCollator* coll,
    773                    const int32_t* reorderCodes,
    774                    int32_t reorderCodesLength,
    775                    UErrorCode *pErrorCode);
    776 
    777 /**
    778 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    779 * codes will be grouped and must reorder together.
    780 * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
    781 * for example Hiragana and Katakana.
    782 *
    783 * @param reorderCode The reorder code to determine equivalence for.
    784 * @param dest The array to fill with the script ordering.
    785 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    786 * will only return the length of the result without writing any codes (pre-flighting).
    787 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 
    788 * a failure before the function call.
    789 * @return The number of reordering codes written to the dest array.
    790 * @see ucol_setReorderCodes
    791 * @see ucol_getReorderCodes
    792 * @see UScriptCode
    793 * @see UColReorderCode
    794 * @stable ICU 4.8
    795 */
    796 U_CAPI int32_t U_EXPORT2 
    797 ucol_getEquivalentReorderCodes(int32_t reorderCode,
    798                    int32_t* dest,
    799                    int32_t destCapacity,
    800                    UErrorCode *pErrorCode);
    801 
    802 /**
    803 * Get the display name for a UCollator.
    804 * The display name is suitable for presentation to a user.
    805 * @param objLoc The locale of the collator in question.
    806 * @param dispLoc The locale for display.
    807 * @param result A pointer to a buffer to receive the attribute.
    808 * @param resultLength The maximum size of result.
    809 * @param status A pointer to a UErrorCode to receive any errors
    810 * @return The total buffer size needed; if greater than resultLength,
    811 * the output was truncated.
    812 * @stable ICU 2.0
    813 */
    814 U_CAPI int32_t U_EXPORT2 
    815 ucol_getDisplayName(    const    char        *objLoc,
    816            const    char        *dispLoc,
    817            UChar             *result,
    818            int32_t         resultLength,
    819            UErrorCode        *status);
    820 
    821 /**
    822 * Get a locale for which collation rules are available.
    823 * A UCollator in a locale returned by this function will perform the correct
    824 * collation for the locale.
    825 * @param localeIndex The index of the desired locale.
    826 * @return A locale for which collation rules are available, or 0 if none.
    827 * @see ucol_countAvailable
    828 * @stable ICU 2.0
    829 */
    830 U_CAPI const char* U_EXPORT2 
    831 ucol_getAvailable(int32_t localeIndex);
    832 
    833 /**
    834 * Determine how many locales have collation rules available.
    835 * This function is most useful as determining the loop ending condition for
    836 * calls to {@link #ucol_getAvailable }.
    837 * @return The number of locales for which collation rules are available.
    838 * @see ucol_getAvailable
    839 * @stable ICU 2.0
    840 */
    841 U_CAPI int32_t U_EXPORT2 
    842 ucol_countAvailable(void);
    843 
    844 #if !UCONFIG_NO_SERVICE
    845 /**
    846 * Create a string enumerator of all locales for which a valid
    847 * collator may be opened.
    848 * @param status input-output error code
    849 * @return a string enumeration over locale strings. The caller is
    850 * responsible for closing the result.
    851 * @stable ICU 3.0
    852 */
    853 U_CAPI UEnumeration* U_EXPORT2
    854 ucol_openAvailableLocales(UErrorCode *status);
    855 #endif
    856 
    857 /**
    858 * Create a string enumerator of all possible keywords that are relevant to
    859 * collation. At this point, the only recognized keyword for this
    860 * service is "collation".
    861 * @param status input-output error code
    862 * @return a string enumeration over locale strings. The caller is
    863 * responsible for closing the result.
    864 * @stable ICU 3.0
    865 */
    866 U_CAPI UEnumeration* U_EXPORT2
    867 ucol_getKeywords(UErrorCode *status);
    868 
    869 /**
    870 * Given a keyword, create a string enumeration of all values
    871 * for that keyword that are currently in use.
    872 * @param keyword a particular keyword as enumerated by
    873 * ucol_getKeywords. If any other keyword is passed in, *status is set
    874 * to U_ILLEGAL_ARGUMENT_ERROR.
    875 * @param status input-output error code
    876 * @return a string enumeration over collation keyword values, or NULL
    877 * upon error. The caller is responsible for closing the result.
    878 * @stable ICU 3.0
    879 */
    880 U_CAPI UEnumeration* U_EXPORT2
    881 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
    882 
    883 /**
    884 * Given a key and a locale, returns an array of string values in a preferred
    885 * order that would make a difference. These are all and only those values where
    886 * the open (creation) of the service with the locale formed from the input locale
    887 * plus input keyword and that value has different behavior than creation with the
    888 * input locale alone.
    889 * @param key           one of the keys supported by this service.  For now, only
    890 *                      "collation" is supported.
    891 * @param locale        the locale
    892 * @param commonlyUsed  if set to true it will return only commonly used values
    893 *                      with the given locale in preferred order.  Otherwise,
    894 *                      it will return all the available values for the locale.
    895 * @param status error status
    896 * @return a string enumeration over keyword values for the given key and the locale.
    897 * @stable ICU 4.2
    898 */
    899 U_CAPI UEnumeration* U_EXPORT2
    900 ucol_getKeywordValuesForLocale(const char* key,
    901                               const char* locale,
    902                               UBool commonlyUsed,
    903                               UErrorCode* status);
    904 
    905 /**
    906 * Return the functionally equivalent locale for the specified
    907 * input locale, with respect to given keyword, for the
    908 * collation service. If two different input locale + keyword
    909 * combinations produce the same result locale, then collators
    910 * instantiated for these two different input locales will behave
    911 * equivalently. The converse is not always true; two collators
    912 * may in fact be equivalent, but return different results, due to
    913 * internal details. The return result has no other meaning than
    914 * that stated above, and implies nothing as to the relationship
    915 * between the two locales. This is intended for use by
    916 * applications who wish to cache collators, or otherwise reuse
    917 * collators when possible. The functional equivalent may change
    918 * over time. For more information, please see the <a
    919 * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services">
    920 * Locales and Services</a> section of the ICU User Guide.
    921 * @param result fillin for the functionally equivalent result locale
    922 * @param resultCapacity capacity of the fillin buffer
    923 * @param keyword a particular keyword as enumerated by
    924 * ucol_getKeywords.
    925 * @param locale the specified input locale
    926 * @param isAvailable if non-NULL, pointer to a fillin parameter that
    927 * on return indicates whether the specified input locale was 'available'
    928 * to the collation service. A locale is defined as 'available' if it
    929 * physically exists within the collation locale data.
    930 * @param status pointer to input-output error code
    931 * @return the actual buffer size needed for the locale. If greater
    932 * than resultCapacity, the returned full name will be truncated and
    933 * an error code will be returned.
    934 * @stable ICU 3.0
    935 */
    936 U_CAPI int32_t U_EXPORT2
    937 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
    938                             const char* keyword, const char* locale,
    939                             UBool* isAvailable, UErrorCode* status);
    940 
    941 /**
    942 * Get the collation tailoring rules from a UCollator.
    943 * The rules will follow the rule syntax.
    944 * @param coll The UCollator to query.
    945 * @param length 
    946 * @return The collation tailoring rules.
    947 * @stable ICU 2.0
    948 */
    949 U_CAPI const UChar* U_EXPORT2 
    950 ucol_getRules(    const    UCollator    *coll, 
    951        int32_t            *length);
    952 
    953 #ifndef U_HIDE_DEPRECATED_API
    954 /** Get the short definition string for a collator. This API harvests the collator's
    955 *  locale and the attribute set and produces a string that can be used for opening 
    956 *  a collator with the same attributes using the ucol_openFromShortString API.
    957 *  This string will be normalized.
    958 *  The structure and the syntax of the string is defined in the "Naming collators"
    959 *  section of the users guide: 
    960 *  https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
    961 *  This API supports preflighting.
    962 *  @param coll a collator
    963 *  @param locale a locale that will appear as a collators locale in the resulting
    964 *                short string definition. If NULL, the locale will be harvested 
    965 *                from the collator.
    966 *  @param buffer space to hold the resulting string
    967 *  @param capacity capacity of the buffer
    968 *  @param status for returning errors. All the preflighting errors are featured
    969 *  @return length of the resulting string
    970 *  @see ucol_openFromShortString
    971 *  @see ucol_normalizeShortDefinitionString
    972 *  @deprecated ICU 54
    973 */
    974 U_DEPRECATED int32_t U_EXPORT2
    975 ucol_getShortDefinitionString(const UCollator *coll,
    976                              const char *locale,
    977                              char *buffer,
    978                              int32_t capacity,
    979                              UErrorCode *status);
    980 
    981 /** Verifies and normalizes short definition string.
    982 *  Normalized short definition string has all the option sorted by the argument name,
    983 *  so that equivalent definition strings are the same. 
    984 *  This API supports preflighting.
    985 *  @param source definition string
    986 *  @param destination space to hold the resulting string
    987 *  @param capacity capacity of the buffer
    988 *  @param parseError if not NULL, structure that will get filled with error's pre
    989 *                   and post context in case of error.
    990 *  @param status     Error code. This API will return an error if an invalid attribute 
    991 *                    or attribute/value combination is specified. All the preflighting 
    992 *                    errors are also featured
    993 *  @return length of the resulting normalized string.
    994 *
    995 *  @see ucol_openFromShortString
    996 *  @see ucol_getShortDefinitionString
    997 * 
    998 *  @deprecated ICU 54
    999 */
   1000 U_DEPRECATED int32_t U_EXPORT2
   1001 ucol_normalizeShortDefinitionString(const char *source,
   1002                                    char *destination,
   1003                                    int32_t capacity,
   1004                                    UParseError *parseError,
   1005                                    UErrorCode *status);
   1006 #endif  /* U_HIDE_DEPRECATED_API */
   1007 
   1008 
   1009 /**
   1010 * Get a sort key for a string from a UCollator.
   1011 * Sort keys may be compared using <TT>strcmp</TT>.
   1012 *
   1013 * Note that sort keys are often less efficient than simply doing comparison.  
   1014 * For more details, see the ICU User Guide.
   1015 *
   1016 * Like ICU functions that write to an output buffer, the buffer contents
   1017 * is undefined if the buffer capacity (resultLength parameter) is too small.
   1018 * Unlike ICU functions that write a string to an output buffer,
   1019 * the terminating zero byte is counted in the sort key length.
   1020 * @param coll The UCollator containing the collation rules.
   1021 * @param source The string to transform.
   1022 * @param sourceLength The length of source, or -1 if null-terminated.
   1023 * @param result A pointer to a buffer to receive the attribute.
   1024 * @param resultLength The maximum size of result.
   1025 * @return The size needed to fully store the sort key.
   1026 *      If there was an internal error generating the sort key,
   1027 *      a zero value is returned.
   1028 * @see ucol_keyHashCode
   1029 * @stable ICU 2.0
   1030 */
   1031 U_CAPI int32_t U_EXPORT2 
   1032 ucol_getSortKey(const    UCollator    *coll,
   1033        const    UChar        *source,
   1034        int32_t        sourceLength,
   1035        uint8_t        *result,
   1036        int32_t        resultLength);
   1037 
   1038 
   1039 /** Gets the next count bytes of a sort key. Caller needs
   1040 *  to preserve state array between calls and to provide
   1041 *  the same type of UCharIterator set with the same string.
   1042 *  The destination buffer provided must be big enough to store
   1043 *  the number of requested bytes.
   1044 *
   1045 *  The generated sort key may or may not be compatible with
   1046 *  sort keys generated using ucol_getSortKey().
   1047 *  @param coll The UCollator containing the collation rules.
   1048 *  @param iter UCharIterator containing the string we need 
   1049 *              the sort key to be calculated for.
   1050 *  @param state Opaque state of sortkey iteration.
   1051 *  @param dest Buffer to hold the resulting sortkey part
   1052 *  @param count number of sort key bytes required.
   1053 *  @param status error code indicator.
   1054 *  @return the actual number of bytes of a sortkey. It can be
   1055 *          smaller than count if we have reached the end of 
   1056 *          the sort key.
   1057 *  @stable ICU 2.6
   1058 */
   1059 U_CAPI int32_t U_EXPORT2 
   1060 ucol_nextSortKeyPart(const UCollator *coll,
   1061                     UCharIterator *iter,
   1062                     uint32_t state[2],
   1063                     uint8_t *dest, int32_t count,
   1064                     UErrorCode *status);
   1065 
   1066 /** enum that is taken by ucol_getBound API 
   1067 * See below for explanation                
   1068 * do not change the values assigned to the 
   1069 * members of this enum. Underlying code    
   1070 * depends on them having these numbers     
   1071 * @stable ICU 2.0
   1072 */
   1073 typedef enum {
   1074  /** lower bound */
   1075  UCOL_BOUND_LOWER = 0,
   1076  /** upper bound that will match strings of exact size */
   1077  UCOL_BOUND_UPPER = 1,
   1078  /** upper bound that will match all the strings that have the same initial substring as the given string */
   1079  UCOL_BOUND_UPPER_LONG = 2,
   1080 #ifndef U_HIDE_DEPRECATED_API
   1081    /**
   1082     * One more than the highest normal UColBoundMode value.
   1083     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   1084     */
   1085    UCOL_BOUND_VALUE_COUNT
   1086 #endif  /* U_HIDE_DEPRECATED_API */
   1087 } UColBoundMode;
   1088 
   1089 /**
   1090 * Produce a bound for a given sortkey and a number of levels.
   1091 * Return value is always the number of bytes needed, regardless of 
   1092 * whether the result buffer was big enough or even valid.<br>
   1093 * Resulting bounds can be used to produce a range of strings that are
   1094 * between upper and lower bounds. For example, if bounds are produced
   1095 * for a sortkey of string "smith", strings between upper and lower 
   1096 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
   1097 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
   1098 * is produced, strings matched would be as above. However, if bound
   1099 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
   1100 * also match "Smithsonian" and similar.<br>
   1101 * For more on usage, see example in cintltst/capitst.c in procedure
   1102 * TestBounds.
   1103 * Sort keys may be compared using <TT>strcmp</TT>.
   1104 * @param source The source sortkey.
   1105 * @param sourceLength The length of source, or -1 if null-terminated. 
   1106 *                     (If an unmodified sortkey is passed, it is always null 
   1107 *                      terminated).
   1108 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
   1109 *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
   1110 *                  produces upper bound that matches strings of the same length 
   1111 *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
   1112 *                  same starting substring as the source string.
   1113 * @param noOfLevels  Number of levels required in the resulting bound (for most 
   1114 *                    uses, the recommended value is 1). See users guide for 
   1115 *                    explanation on number of levels a sortkey can have.
   1116 * @param result A pointer to a buffer to receive the resulting sortkey.
   1117 * @param resultLength The maximum size of result.
   1118 * @param status Used for returning error code if something went wrong. If the 
   1119 *               number of levels requested is higher than the number of levels
   1120 *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
   1121 *               issued.
   1122 * @return The size needed to fully store the bound. 
   1123 * @see ucol_keyHashCode
   1124 * @stable ICU 2.1
   1125 */
   1126 U_CAPI int32_t U_EXPORT2 
   1127 ucol_getBound(const uint8_t       *source,
   1128        int32_t             sourceLength,
   1129        UColBoundMode       boundType,
   1130        uint32_t            noOfLevels,
   1131        uint8_t             *result,
   1132        int32_t             resultLength,
   1133        UErrorCode          *status);
   1134        
   1135 /**
   1136 * Gets the version information for a Collator. Version is currently
   1137 * an opaque 32-bit number which depends, among other things, on major
   1138 * versions of the collator tailoring and UCA.
   1139 * @param coll The UCollator to query.
   1140 * @param info the version # information, the result will be filled in
   1141 * @stable ICU 2.0
   1142 */
   1143 U_CAPI void U_EXPORT2
   1144 ucol_getVersion(const UCollator* coll, UVersionInfo info);
   1145 
   1146 /**
   1147 * Gets the UCA version information for a Collator. Version is the
   1148 * UCA version number (3.1.1, 4.0).
   1149 * @param coll The UCollator to query.
   1150 * @param info the version # information, the result will be filled in
   1151 * @stable ICU 2.8
   1152 */
   1153 U_CAPI void U_EXPORT2
   1154 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
   1155 
   1156 /**
   1157 * Merges two sort keys. The levels are merged with their corresponding counterparts
   1158 * (primaries with primaries, secondaries with secondaries etc.). Between the values
   1159 * from the same level a separator is inserted.
   1160 *
   1161 * This is useful, for example, for combining sort keys from first and last names
   1162 * to sort such pairs.
   1163 * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
   1164 *
   1165 * The recommended way to achieve "merged" sorting is by
   1166 * concatenating strings with U+FFFE between them.
   1167 * The concatenation has the same sort order as the merged sort keys,
   1168 * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2).
   1169 * Using strings with U+FFFE may yield shorter sort keys.
   1170 *
   1171 * For details about Sort Key Features see
   1172 * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
   1173 *
   1174 * It is possible to merge multiple sort keys by consecutively merging
   1175 * another one with the intermediate result.
   1176 *
   1177 * The length of the merge result is the sum of the lengths of the input sort keys.
   1178 *
   1179 * Example (uncompressed):
   1180 * <pre>191B1D 01 050505 01 910505 00
   1181 * 1F2123 01 050505 01 910505 00</pre>
   1182 * will be merged as 
   1183 * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
   1184 *
   1185 * If the destination buffer is not big enough, then its contents are undefined.
   1186 * If any of source lengths are zero or any of the source pointers are NULL/undefined,
   1187 * the result is of size zero.
   1188 *
   1189 * @param src1 the first sort key
   1190 * @param src1Length the length of the first sort key, including the zero byte at the end;
   1191 *        can be -1 if the function is to find the length
   1192 * @param src2 the second sort key
   1193 * @param src2Length the length of the second sort key, including the zero byte at the end;
   1194 *        can be -1 if the function is to find the length
   1195 * @param dest the buffer where the merged sort key is written,
   1196 *        can be NULL if destCapacity==0
   1197 * @param destCapacity the number of bytes in the dest buffer
   1198 * @return the length of the merged sort key, src1Length+src2Length;
   1199 *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
   1200 *         in which cases the contents of dest is undefined
   1201 * @stable ICU 2.0
   1202 */
   1203 U_CAPI int32_t U_EXPORT2 
   1204 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
   1205                   const uint8_t *src2, int32_t src2Length,
   1206                   uint8_t *dest, int32_t destCapacity);
   1207 
   1208 /**
   1209 * Universal attribute setter
   1210 * @param coll collator which attributes are to be changed
   1211 * @param attr attribute type 
   1212 * @param value attribute value
   1213 * @param status to indicate whether the operation went on smoothly or there were errors
   1214 * @see UColAttribute
   1215 * @see UColAttributeValue
   1216 * @see ucol_getAttribute
   1217 * @stable ICU 2.0
   1218 */
   1219 U_CAPI void U_EXPORT2 
   1220 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
   1221 
   1222 /**
   1223 * Universal attribute getter
   1224 * @param coll collator which attributes are to be changed
   1225 * @param attr attribute type
   1226 * @return attribute value
   1227 * @param status to indicate whether the operation went on smoothly or there were errors
   1228 * @see UColAttribute
   1229 * @see UColAttributeValue
   1230 * @see ucol_setAttribute
   1231 * @stable ICU 2.0
   1232 */
   1233 U_CAPI UColAttributeValue  U_EXPORT2 
   1234 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
   1235 
   1236 /**
   1237 * Sets the variable top to the top of the specified reordering group.
   1238 * The variable top determines the highest-sorting character
   1239 * which is affected by UCOL_ALTERNATE_HANDLING.
   1240 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
   1241 * @param coll the collator
   1242 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
   1243 *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
   1244 *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
   1245 * @param pErrorCode Standard ICU error code. Its input value must
   1246 *                   pass the U_SUCCESS() test, or else the function returns
   1247 *                   immediately. Check for U_FAILURE() on output or use with
   1248 *                   function chaining. (See User Guide for details.)
   1249 * @see ucol_getMaxVariable
   1250 * @stable ICU 53
   1251 */
   1252 U_CAPI void U_EXPORT2
   1253 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
   1254 
   1255 /**
   1256 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
   1257 * @param coll the collator
   1258 * @return the maximum variable reordering group.
   1259 * @see ucol_setMaxVariable
   1260 * @stable ICU 53
   1261 */
   1262 U_CAPI UColReorderCode U_EXPORT2
   1263 ucol_getMaxVariable(const UCollator *coll);
   1264 
   1265 #ifndef U_HIDE_DEPRECATED_API
   1266 /**
   1267 * Sets the variable top to the primary weight of the specified string.
   1268 *
   1269 * Beginning with ICU 53, the variable top is pinned to
   1270 * the top of one of the supported reordering groups,
   1271 * and it must not be beyond the last of those groups.
   1272 * See ucol_setMaxVariable().
   1273 * @param coll the collator
   1274 * @param varTop one or more (if contraction) UChars to which the variable top should be set
   1275 * @param len length of variable top string. If -1 it is considered to be zero terminated.
   1276 * @param status error code. If error code is set, the return value is undefined.
   1277 *               Errors set by this function are:<br>
   1278 *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
   1279 *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
   1280 *    the last reordering group supported by ucol_setMaxVariable()
   1281 * @return variable top primary weight
   1282 * @see ucol_getVariableTop
   1283 * @see ucol_restoreVariableTop
   1284 * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
   1285 */
   1286 U_DEPRECATED uint32_t U_EXPORT2 
   1287 ucol_setVariableTop(UCollator *coll, 
   1288                    const UChar *varTop, int32_t len, 
   1289                    UErrorCode *status);
   1290 #endif  /* U_HIDE_DEPRECATED_API */
   1291 
   1292 /** 
   1293 * Gets the variable top value of a Collator. 
   1294 * @param coll collator which variable top needs to be retrieved
   1295 * @param status error code (not changed by function). If error code is set, 
   1296 *               the return value is undefined.
   1297 * @return the variable top primary weight
   1298 * @see ucol_getMaxVariable
   1299 * @see ucol_setVariableTop
   1300 * @see ucol_restoreVariableTop
   1301 * @stable ICU 2.0
   1302 */
   1303 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
   1304 
   1305 #ifndef U_HIDE_DEPRECATED_API
   1306 /**
   1307 * Sets the variable top to the specified primary weight.
   1308 *
   1309 * Beginning with ICU 53, the variable top is pinned to
   1310 * the top of one of the supported reordering groups,
   1311 * and it must not be beyond the last of those groups.
   1312 * See ucol_setMaxVariable().
   1313 * @param coll collator to be set
   1314 * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
   1315 * @param status error code
   1316 * @see ucol_getVariableTop
   1317 * @see ucol_setVariableTop
   1318 * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
   1319 */
   1320 U_DEPRECATED void U_EXPORT2 
   1321 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
   1322 #endif  /* U_HIDE_DEPRECATED_API */
   1323 
   1324 /**
   1325 * Thread safe cloning operation. The result is a clone of a given collator.
   1326 * @param coll collator to be cloned
   1327 * @param status to indicate whether the operation went on smoothly or there were errors
   1328 * @return pointer to the new clone
   1329 * @see ucol_open
   1330 * @see ucol_openRules
   1331 * @see ucol_close
   1332 * @stable ICU 71
   1333 */
   1334 U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status);
   1335 
   1336 #ifndef U_HIDE_DEPRECATED_API
   1337 
   1338 /**
   1339 * Thread safe cloning operation. The result is a clone of a given collator.
   1340 * @param coll collator to be cloned
   1341 * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
   1342 * user allocated space for the new clone. 
   1343 * If NULL new memory will be allocated. 
   1344 *  If buffer is not large enough, new memory will be allocated.
   1345 *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
   1346 * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
   1347 *  pointer to size of allocated space. 
   1348 *  If *pBufferSize == 0, a sufficient size for use in cloning will 
   1349 *  be returned ('pre-flighting')
   1350 *  If *pBufferSize is not enough for a stack-based safe clone, 
   1351 *  new memory will be allocated.
   1352 * @param status to indicate whether the operation went on smoothly or there were errors
   1353 *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
   1354 * if pBufferSize != NULL and any allocations were necessary
   1355 * @return pointer to the new clone
   1356 * @see ucol_open
   1357 * @see ucol_openRules
   1358 * @see ucol_close
   1359 * @deprecated ICU 71 Use ucol_clone() instead.
   1360 */
   1361 U_DEPRECATED UCollator* U_EXPORT2
   1362 ucol_safeClone(const UCollator *coll,
   1363               void            *stackBuffer,
   1364               int32_t         *pBufferSize,
   1365               UErrorCode      *status);
   1366 
   1367 
   1368 /** default memory size for the new clone.
   1369 * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
   1370 */
   1371 #define U_COL_SAFECLONE_BUFFERSIZE 1
   1372 
   1373 #endif /* U_HIDE_DEPRECATED_API */
   1374 
   1375 /**
   1376 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
   1377 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
   1378 * to store rules, will store up to available space.
   1379 *
   1380 * ucol_getRules() should normally be used instead.
   1381 * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
   1382 * @param coll collator to get the rules from
   1383 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
   1384 * @param buffer buffer to store the result in. If NULL, you'll get no rules.
   1385 * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
   1386 * @return current rules
   1387 * @stable ICU 2.0
   1388 * @see UCOL_FULL_RULES
   1389 */
   1390 U_CAPI int32_t U_EXPORT2 
   1391 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
   1392 
   1393 #ifndef U_HIDE_DEPRECATED_API
   1394 /**
   1395 * gets the locale name of the collator. If the collator
   1396 * is instantiated from the rules, then this function returns
   1397 * NULL.
   1398 * @param coll The UCollator for which the locale is needed
   1399 * @param type You can choose between requested, valid and actual
   1400 *             locale. For description see the definition of
   1401 *             ULocDataLocaleType in uloc.h
   1402 * @param status error code of the operation
   1403 * @return real locale name from which the collation data comes. 
   1404 *         If the collator was instantiated from rules, returns
   1405 *         NULL.
   1406 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
   1407 */
   1408 U_DEPRECATED const char * U_EXPORT2
   1409 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
   1410 #endif  /* U_HIDE_DEPRECATED_API */
   1411 
   1412 /**
   1413 * gets the locale name of the collator. If the collator
   1414 * is instantiated from the rules, then this function returns
   1415 * NULL.
   1416 * @param coll The UCollator for which the locale is needed
   1417 * @param type You can choose between requested, valid and actual
   1418 *             locale. For description see the definition of
   1419 *             ULocDataLocaleType in uloc.h
   1420 * @param status error code of the operation
   1421 * @return real locale name from which the collation data comes. 
   1422 *         If the collator was instantiated from rules, returns
   1423 *         NULL.
   1424 * @stable ICU 2.8
   1425 */
   1426 U_CAPI const char * U_EXPORT2
   1427 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
   1428 
   1429 /**
   1430 * Get a Unicode set that contains all the characters and sequences tailored in 
   1431 * this collator. The result must be disposed of by using uset_close.
   1432 * @param coll        The UCollator for which we want to get tailored chars
   1433 * @param status      error code of the operation
   1434 * @return a pointer to newly created USet. Must be be disposed by using uset_close
   1435 * @see ucol_openRules
   1436 * @see uset_close
   1437 * @stable ICU 2.4
   1438 */
   1439 U_CAPI USet * U_EXPORT2
   1440 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
   1441 
   1442 #ifndef U_HIDE_INTERNAL_API
   1443 /** Calculates the set of unsafe code points, given a collator.
   1444 *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
   1445 *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
   1446 *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
   1447 *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
   1448 *  @param coll Collator
   1449 *  @param unsafe a fill-in set to receive the unsafe points
   1450 *  @param status for catching errors
   1451 *  @return number of elements in the set
   1452 *  @internal ICU 3.0
   1453 */
   1454 U_CAPI int32_t U_EXPORT2
   1455 ucol_getUnsafeSet( const UCollator *coll,
   1456                  USet *unsafe,
   1457                  UErrorCode *status);
   1458 
   1459 /** Touches all resources needed for instantiating a collator from a short string definition,
   1460 *  thus filling up the cache.
   1461 * @param definition A short string containing a locale and a set of attributes. 
   1462 *                   Attributes not explicitly mentioned are left at the default
   1463 *                   state for a locale.
   1464 * @param parseError if not NULL, structure that will get filled with error's pre
   1465 *                   and post context in case of error.
   1466 * @param forceDefaults if false, the settings that are the same as the collator 
   1467 *                   default settings will not be applied (for example, setting
   1468 *                   French secondary on a French collator would not be executed). 
   1469 *                   If true, all the settings will be applied regardless of the 
   1470 *                   collator default value. If the definition
   1471 *                   strings are to be cached, should be set to false.
   1472 * @param status     Error code. Apart from regular error conditions connected to 
   1473 *                   instantiating collators (like out of memory or similar), this
   1474 *                   API will return an error if an invalid attribute or attribute/value
   1475 *                   combination is specified.
   1476 * @see ucol_openFromShortString
   1477 * @internal ICU 3.2.1
   1478 */
   1479 U_CAPI void U_EXPORT2
   1480 ucol_prepareShortStringOpen( const char *definition,
   1481                          UBool forceDefaults,
   1482                          UParseError *parseError,
   1483                          UErrorCode *status);
   1484 #endif  /* U_HIDE_INTERNAL_API */
   1485 
   1486 /** Creates a binary image of a collator. This binary image can be stored and 
   1487 *  later used to instantiate a collator using ucol_openBinary.
   1488 *  This API supports preflighting.
   1489 *  @param coll Collator
   1490 *  @param buffer a fill-in buffer to receive the binary image
   1491 *  @param capacity capacity of the destination buffer
   1492 *  @param status for catching errors
   1493 *  @return size of the image
   1494 *  @see ucol_openBinary
   1495 *  @stable ICU 3.2
   1496 */
   1497 U_CAPI int32_t U_EXPORT2
   1498 ucol_cloneBinary(const UCollator *coll,
   1499                 uint8_t *buffer, int32_t capacity,
   1500                 UErrorCode *status);
   1501 
   1502 /** Opens a collator from a collator binary image created using
   1503 *  ucol_cloneBinary. Binary image used in instantiation of the 
   1504 *  collator remains owned by the user and should stay around for 
   1505 *  the lifetime of the collator. The API also takes a base collator
   1506 *  which must be the root collator.
   1507 *  @param bin binary image owned by the user and required through the
   1508 *             lifetime of the collator
   1509 *  @param length size of the image. If negative, the API will try to
   1510 *                figure out the length of the image
   1511 *  @param base Base collator, for lookup of untailored characters.
   1512 *              Must be the root collator, must not be NULL.
   1513 *              The base is required to be present through the lifetime of the collator.
   1514 *  @param status for catching errors
   1515 *  @return newly created collator
   1516 *  @see ucol_cloneBinary
   1517 *  @stable ICU 3.2
   1518 */
   1519 U_CAPI UCollator* U_EXPORT2
   1520 ucol_openBinary(const uint8_t *bin, int32_t length, 
   1521                const UCollator *base, 
   1522                UErrorCode *status);
   1523 
   1524 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
   1525 
   1526 #include <functional>
   1527 #include <string_view>
   1528 #include <type_traits>
   1529 
   1530 #include "unicode/char16ptr.h"
   1531 #include "unicode/unistr.h"
   1532 
   1533 namespace U_HEADER_ONLY_NAMESPACE {
   1534 
   1535 namespace collator {
   1536 
   1537 namespace internal {
   1538 
   1539 /**
   1540 * Function object for performing comparisons using a UCollator.
   1541 * @internal
   1542 */
   1543 template <template <typename...> typename Compare, UCollationResult result>
   1544 class Predicate {
   1545  public:
   1546    /** @internal */
   1547    explicit Predicate(const UCollator* ucol) : collator(ucol) {}
   1548 
   1549 #if U_SHOW_CPLUSPLUS_API
   1550    /** @internal */
   1551    template <
   1552        typename T, typename U,
   1553        typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
   1554    bool operator()(const T& lhs, const U& rhs) const {
   1555        return match(UnicodeString::readOnlyAlias(lhs), UnicodeString::readOnlyAlias(rhs));
   1556    }
   1557 #else
   1558    /** @internal */
   1559    bool operator()(std::u16string_view lhs, std::u16string_view rhs) const {
   1560        return match(lhs, rhs);
   1561    }
   1562 
   1563 #if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
   1564    /** @internal */
   1565    bool operator()(std::basic_string_view<uint16_t> lhs, std::basic_string_view<uint16_t> rhs) const {
   1566        return match({uprv_char16PtrFromUint16(lhs.data()), lhs.length()},
   1567                     {uprv_char16PtrFromUint16(rhs.data()), rhs.length()});
   1568    }
   1569 #endif
   1570 
   1571 #if U_SIZEOF_WCHAR_T==2
   1572    /** @internal */
   1573    bool operator()(std::wstring_view lhs, std::wstring_view rhs) const {
   1574        return match({uprv_char16PtrFromWchar(lhs.data()), lhs.length()},
   1575                     {uprv_char16PtrFromWchar(rhs.data()), rhs.length()});
   1576    }
   1577 #endif
   1578 #endif
   1579 
   1580    /** @internal */
   1581    bool operator()(std::string_view lhs, std::string_view rhs) const {
   1582        return match(lhs, rhs);
   1583    }
   1584 
   1585 #if defined(__cpp_char8_t)
   1586    /** @internal */
   1587    bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
   1588        return match({reinterpret_cast<const char*>(lhs.data()), lhs.length()},
   1589                     {reinterpret_cast<const char*>(rhs.data()), rhs.length()});
   1590    }
   1591 #endif
   1592 
   1593  private:
   1594    bool match(std::u16string_view lhs, std::u16string_view rhs) const {
   1595        return compare(
   1596            ucol_strcoll(
   1597                collator,
   1598                toUCharPtr(lhs.data()), static_cast<int32_t>(lhs.length()),
   1599                toUCharPtr(rhs.data()), static_cast<int32_t>(rhs.length())),
   1600            result);
   1601    }
   1602 
   1603    bool match(std::string_view lhs, std::string_view rhs) const {
   1604        UErrorCode status = U_ZERO_ERROR;
   1605        return compare(
   1606            ucol_strcollUTF8(
   1607                collator,
   1608                lhs.data(), static_cast<int32_t>(lhs.length()),
   1609                rhs.data(), static_cast<int32_t>(rhs.length()),
   1610                &status),
   1611            result);
   1612    }
   1613 
   1614    const UCollator* const collator;
   1615    static constexpr Compare<UCollationResult> compare{};
   1616 };
   1617 
   1618 }  // namespace internal
   1619 
   1620 /**
   1621 * Function object for performing comparisons using this collator.
   1622 * Like <code>std::equal_to</code> but uses the collator instead of <code>operator==</code>.
   1623 * @stable ICU 76
   1624 */
   1625 using equal_to = internal::Predicate<std::equal_to, UCOL_EQUAL>;
   1626 
   1627 /**
   1628 * Function object for performing comparisons using this collator.
   1629 * Like <code>std::greater</code> but uses the collator instead of <code>operator&gt;</code>.
   1630 * @stable ICU 76
   1631 */
   1632 using greater = internal::Predicate<std::equal_to, UCOL_GREATER>;
   1633 
   1634 /**
   1635 * Function object for performing comparisons using this collator.
   1636 * Like <code>std::less</code> but uses the collator instead of <code>operator&lt;</code>.
   1637 * @stable ICU 76
   1638 */
   1639 using less = internal::Predicate<std::equal_to, UCOL_LESS>;
   1640 
   1641 /**
   1642 * Function object for performing comparisons using this collator.
   1643 * Like <code>std::not_equal_to</code> but uses the collator instead of <code>operator!=</code>.
   1644 * @stable ICU 76
   1645 */
   1646 using not_equal_to = internal::Predicate<std::not_equal_to, UCOL_EQUAL>;
   1647 
   1648 /**
   1649 * Function object for performing comparisons using this collator.
   1650 * Like <code>std::greater_equal</code> but uses the collator instead of <code>operator&gt;=</code>.
   1651 * @stable ICU 76
   1652 */
   1653 using greater_equal = internal::Predicate<std::not_equal_to, UCOL_LESS>;
   1654 
   1655 /**
   1656 * Function object for performing comparisons using this collator.
   1657 * Like <code>std::less_equal</code> but uses the collator instead of <code>operator&lt;=</code>.
   1658 * @stable ICU 76
   1659 */
   1660 using less_equal = internal::Predicate<std::not_equal_to, UCOL_GREATER>;
   1661 
   1662 }  // namespace collator
   1663 
   1664 }  // namespace U_HEADER_ONLY_NAMESPACE
   1665 
   1666 #endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
   1667 
   1668 #endif /* #if !UCONFIG_NO_COLLATION */
   1669 
   1670 #endif