tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucasemap.h (15633B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  ucasemap.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005may06
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Case mapping service object and functions using it.
     19 */
     20 
     21 #ifndef __UCASEMAP_H__
     22 #define __UCASEMAP_H__
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/stringoptions.h"
     26 #include "unicode/ustring.h"
     27 
     28 #if U_SHOW_CPLUSPLUS_API
     29 #include "unicode/localpointer.h"
     30 #endif   // U_SHOW_CPLUSPLUS_API
     31 
     32 /**
     33 * \file
     34 * \brief C API: Unicode case mapping functions using a UCaseMap service object.
     35 *
     36 * The service object takes care of memory allocations, data loading, and setup
     37 * for the attributes, as usual.
     38 *
     39 * Currently, the functionality provided here does not overlap with uchar.h
     40 * and ustring.h, except for ucasemap_toTitle().
     41 *
     42 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
     43 */
     44 
     45 /**
     46 * UCaseMap is an opaque service object for newer ICU case mapping functions.
     47 * Older functions did not use a service object.
     48 * @stable ICU 3.4
     49 */
     50 struct UCaseMap;
     51 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
     52 
     53 /**
     54 * Open a UCaseMap service object for a locale and a set of options.
     55 * The locale ID and options are preprocessed so that functions using the
     56 * service object need not process them in each call.
     57 *
     58 * @param locale ICU locale ID, used for language-dependent
     59 *               upper-/lower-/title-casing according to the Unicode standard.
     60 *               Usual semantics: ""=root, NULL=default locale, etc.
     61 * @param options Options bit set, used for case folding and string comparisons.
     62 *                Same flags as for u_foldCase(), u_strFoldCase(),
     63 *                u_strCaseCompare(), etc.
     64 *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
     65 * @param pErrorCode Must be a valid pointer to an error code value,
     66 *                   which must not indicate a failure before the function call.
     67 * @return Pointer to a UCaseMap service object, if successful.
     68 *
     69 * @see U_FOLD_CASE_DEFAULT
     70 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
     71 * @see U_TITLECASE_NO_LOWERCASE
     72 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
     73 * @stable ICU 3.4
     74 */
     75 U_CAPI UCaseMap * U_EXPORT2
     76 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
     77 
     78 /**
     79 * Close a UCaseMap service object.
     80 * @param csm Object to be closed.
     81 * @stable ICU 3.4
     82 */
     83 U_CAPI void U_EXPORT2
     84 ucasemap_close(UCaseMap *csm);
     85 
     86 #if U_SHOW_CPLUSPLUS_API
     87 
     88 U_NAMESPACE_BEGIN
     89 
     90 /**
     91 * \class LocalUCaseMapPointer
     92 * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
     93 * For most methods see the LocalPointerBase base class.
     94 *
     95 * @see LocalPointerBase
     96 * @see LocalPointer
     97 * @stable ICU 4.4
     98 */
     99 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
    100 
    101 U_NAMESPACE_END
    102 
    103 #endif
    104 
    105 /**
    106 * Get the locale ID that is used for language-dependent case mappings.
    107 * @param csm UCaseMap service object.
    108 * @return locale ID
    109 * @stable ICU 3.4
    110 */
    111 U_CAPI const char * U_EXPORT2
    112 ucasemap_getLocale(const UCaseMap *csm);
    113 
    114 /**
    115 * Get the options bit set that is used for case folding and string comparisons.
    116 * @param csm UCaseMap service object.
    117 * @return options bit set
    118 * @stable ICU 3.4
    119 */
    120 U_CAPI uint32_t U_EXPORT2
    121 ucasemap_getOptions(const UCaseMap *csm);
    122 
    123 /**
    124 * Set the locale ID that is used for language-dependent case mappings.
    125 *
    126 * @param csm UCaseMap service object.
    127 * @param locale Locale ID, see ucasemap_open().
    128 * @param pErrorCode Must be a valid pointer to an error code value,
    129 *                   which must not indicate a failure before the function call.
    130 *
    131 * @see ucasemap_open
    132 * @stable ICU 3.4
    133 */
    134 U_CAPI void U_EXPORT2
    135 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
    136 
    137 /**
    138 * Set the options bit set that is used for case folding and string comparisons.
    139 *
    140 * @param csm UCaseMap service object.
    141 * @param options Options bit set, see ucasemap_open().
    142 * @param pErrorCode Must be a valid pointer to an error code value,
    143 *                   which must not indicate a failure before the function call.
    144 *
    145 * @see ucasemap_open
    146 * @stable ICU 3.4
    147 */
    148 U_CAPI void U_EXPORT2
    149 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
    150 
    151 #if !UCONFIG_NO_BREAK_ITERATION
    152 
    153 /**
    154 * Get the break iterator that is used for titlecasing.
    155 * Do not modify the returned break iterator.
    156 * @param csm UCaseMap service object.
    157 * @return titlecasing break iterator
    158 * @stable ICU 3.8
    159 */
    160 U_CAPI const UBreakIterator * U_EXPORT2
    161 ucasemap_getBreakIterator(const UCaseMap *csm);
    162 
    163 /**
    164 * Set the break iterator that is used for titlecasing.
    165 * The UCaseMap service object releases a previously set break iterator
    166 * and "adopts" this new one, taking ownership of it.
    167 * It will be released in a subsequent call to ucasemap_setBreakIterator()
    168 * or ucasemap_close().
    169 *
    170 * Break iterator operations are not thread-safe. Therefore, titlecasing
    171 * functions use non-const UCaseMap objects. It is not possible to titlecase
    172 * strings concurrently using the same UCaseMap.
    173 *
    174 * @param csm UCaseMap service object.
    175 * @param iterToAdopt Break iterator to be adopted for titlecasing.
    176 * @param pErrorCode Must be a valid pointer to an error code value,
    177 *                   which must not indicate a failure before the function call.
    178 *
    179 * @see ucasemap_toTitle
    180 * @see ucasemap_utf8ToTitle
    181 * @stable ICU 3.8
    182 */
    183 U_CAPI void U_EXPORT2
    184 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
    185 
    186 /**
    187 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
    188 * except that it takes ucasemap_setOptions() into account and has performance
    189 * advantages from being able to use a UCaseMap object for multiple case mapping
    190 * operations, saving setup time.
    191 *
    192 * Casing is locale-dependent and context-sensitive.
    193 * Titlecasing uses a break iterator to find the first characters of words
    194 * that are to be titlecased. It titlecases those characters and lowercases
    195 * all others. (This can be modified with ucasemap_setOptions().)
    196 *
    197 * Note: This function takes a non-const UCaseMap pointer because it will
    198 * open a default break iterator if no break iterator was set yet,
    199 * and effectively call ucasemap_setBreakIterator();
    200 * also because the break iterator is stateful and will be modified during
    201 * the iteration.
    202 *
    203 * The titlecase break iterator can be provided to customize for arbitrary
    204 * styles, using rules and dictionaries beyond the standard iterators.
    205 * If the break iterator passed in is null, the default Unicode algorithm
    206 * will be used to determine the titlecase positions.
    207 *
    208 * This function uses only the setText(), first() and next() methods of the
    209 * provided break iterator.
    210 *
    211 * The result may be longer or shorter than the original.
    212 * The source string and the destination buffer must not overlap.
    213 *
    214 * @param csm       UCaseMap service object. This pointer is non-const!
    215 *                  See the note above for details.
    216 * @param dest      A buffer for the result string. The result will be NUL-terminated if
    217 *                  the buffer is large enough.
    218 *                  The contents is undefined in case of failure.
    219 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
    220 *                  dest may be NULL and the function will only return the length of the result
    221 *                  without writing any of the result string.
    222 * @param src       The original string.
    223 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    224 * @param pErrorCode Must be a valid pointer to an error code value,
    225 *                  which must not indicate a failure before the function call.
    226 * @return The length of the result string, if successful - or in case of a buffer overflow,
    227 *         in which case it will be greater than destCapacity.
    228 *
    229 * @see u_strToTitle
    230 * @stable ICU 3.8
    231 */
    232 U_CAPI int32_t U_EXPORT2
    233 ucasemap_toTitle(UCaseMap *csm,
    234                 UChar *dest, int32_t destCapacity,
    235                 const UChar *src, int32_t srcLength,
    236                 UErrorCode *pErrorCode);
    237 
    238 #endif  // UCONFIG_NO_BREAK_ITERATION
    239 
    240 /**
    241 * Lowercase the characters in a UTF-8 string.
    242 * Casing is locale-dependent and context-sensitive.
    243 * The result may be longer or shorter than the original.
    244 * The source string and the destination buffer must not overlap.
    245 *
    246 * @param csm       UCaseMap service object.
    247 * @param dest      A buffer for the result string. The result will be NUL-terminated if
    248 *                  the buffer is large enough.
    249 *                  The contents is undefined in case of failure.
    250 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    251 *                  dest may be NULL and the function will only return the length of the result
    252 *                  without writing any of the result string.
    253 * @param src       The original string.
    254 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    255 * @param pErrorCode Must be a valid pointer to an error code value,
    256 *                  which must not indicate a failure before the function call.
    257 * @return The length of the result string, if successful - or in case of a buffer overflow,
    258 *         in which case it will be greater than destCapacity.
    259 *
    260 * @see u_strToLower
    261 * @stable ICU 3.4
    262 */
    263 U_CAPI int32_t U_EXPORT2
    264 ucasemap_utf8ToLower(const UCaseMap *csm,
    265                     char *dest, int32_t destCapacity,
    266                     const char *src, int32_t srcLength,
    267                     UErrorCode *pErrorCode);
    268 
    269 /**
    270 * Uppercase the characters in a UTF-8 string.
    271 * Casing is locale-dependent and context-sensitive.
    272 * The result may be longer or shorter than the original.
    273 * The source string and the destination buffer must not overlap.
    274 *
    275 * @param csm       UCaseMap service object.
    276 * @param dest      A buffer for the result string. The result will be NUL-terminated if
    277 *                  the buffer is large enough.
    278 *                  The contents is undefined in case of failure.
    279 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    280 *                  dest may be NULL and the function will only return the length of the result
    281 *                  without writing any of the result string.
    282 * @param src       The original string.
    283 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    284 * @param pErrorCode Must be a valid pointer to an error code value,
    285 *                  which must not indicate a failure before the function call.
    286 * @return The length of the result string, if successful - or in case of a buffer overflow,
    287 *         in which case it will be greater than destCapacity.
    288 *
    289 * @see u_strToUpper
    290 * @stable ICU 3.4
    291 */
    292 U_CAPI int32_t U_EXPORT2
    293 ucasemap_utf8ToUpper(const UCaseMap *csm,
    294                     char *dest, int32_t destCapacity,
    295                     const char *src, int32_t srcLength,
    296                     UErrorCode *pErrorCode);
    297 
    298 #if !UCONFIG_NO_BREAK_ITERATION
    299 
    300 /**
    301 * Titlecase a UTF-8 string.
    302 * Casing is locale-dependent and context-sensitive.
    303 * Titlecasing uses a break iterator to find the first characters of words
    304 * that are to be titlecased. It titlecases those characters and lowercases
    305 * all others. (This can be modified with ucasemap_setOptions().)
    306 *
    307 * Note: This function takes a non-const UCaseMap pointer because it will
    308 * open a default break iterator if no break iterator was set yet,
    309 * and effectively call ucasemap_setBreakIterator();
    310 * also because the break iterator is stateful and will be modified during
    311 * the iteration.
    312 *
    313 * The titlecase break iterator can be provided to customize for arbitrary
    314 * styles, using rules and dictionaries beyond the standard iterators.
    315 * If the break iterator passed in is null, the default Unicode algorithm
    316 * will be used to determine the titlecase positions.
    317 *
    318 * This function uses only the setUText(), first(), next() and close() methods of the
    319 * provided break iterator.
    320 *
    321 * The result may be longer or shorter than the original.
    322 * The source string and the destination buffer must not overlap.
    323 *
    324 * @param csm       UCaseMap service object. This pointer is non-const!
    325 *                  See the note above for details.
    326 * @param dest      A buffer for the result string. The result will be NUL-terminated if
    327 *                  the buffer is large enough.
    328 *                  The contents is undefined in case of failure.
    329 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    330 *                  dest may be NULL and the function will only return the length of the result
    331 *                  without writing any of the result string.
    332 * @param src       The original string.
    333 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    334 * @param pErrorCode Must be a valid pointer to an error code value,
    335 *                  which must not indicate a failure before the function call.
    336 * @return The length of the result string, if successful - or in case of a buffer overflow,
    337 *         in which case it will be greater than destCapacity.
    338 *
    339 * @see u_strToTitle
    340 * @see U_TITLECASE_NO_LOWERCASE
    341 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
    342 * @stable ICU 3.8
    343 */
    344 U_CAPI int32_t U_EXPORT2
    345 ucasemap_utf8ToTitle(UCaseMap *csm,
    346                    char *dest, int32_t destCapacity,
    347                    const char *src, int32_t srcLength,
    348                    UErrorCode *pErrorCode);
    349 
    350 #endif
    351 
    352 /**
    353 * Case-folds the characters in a UTF-8 string.
    354 *
    355 * Case-folding is locale-independent and not context-sensitive,
    356 * but there is an option for whether to include or exclude mappings for dotted I
    357 * and dotless i that are marked with 'T' in CaseFolding.txt.
    358 *
    359 * The result may be longer or shorter than the original.
    360 * The source string and the destination buffer must not overlap.
    361 *
    362 * @param csm       UCaseMap service object.
    363 * @param dest      A buffer for the result string. The result will be NUL-terminated if
    364 *                  the buffer is large enough.
    365 *                  The contents is undefined in case of failure.
    366 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    367 *                  dest may be NULL and the function will only return the length of the result
    368 *                  without writing any of the result string.
    369 * @param src       The original string.
    370 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    371 * @param pErrorCode Must be a valid pointer to an error code value,
    372 *                  which must not indicate a failure before the function call.
    373 * @return The length of the result string, if successful - or in case of a buffer overflow,
    374 *         in which case it will be greater than destCapacity.
    375 *
    376 * @see u_strFoldCase
    377 * @see ucasemap_setOptions
    378 * @see U_FOLD_CASE_DEFAULT
    379 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
    380 * @stable ICU 3.8
    381 */
    382 U_CAPI int32_t U_EXPORT2
    383 ucasemap_utf8FoldCase(const UCaseMap *csm,
    384                      char *dest, int32_t destCapacity,
    385                      const char *src, int32_t srcLength,
    386                      UErrorCode *pErrorCode);
    387 
    388 #endif