tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

casemap.h (26027B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // casemap.h
      5 // created: 2017jan12 Markus W. Scherer
      6 
      7 #ifndef __CASEMAP_H__
      8 #define __CASEMAP_H__
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if U_SHOW_CPLUSPLUS_API
     13 
     14 #include "unicode/stringpiece.h"
     15 #include "unicode/uobject.h"
     16 
     17 /**
     18 * \file
     19 * \brief C++ API: Low-level C++ case mapping functions.
     20 */
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 class BreakIterator;
     25 class ByteSink;
     26 class Edits;
     27 
     28 /**
     29 * Low-level C++ case mapping functions.
     30 *
     31 * @stable ICU 59
     32 */
     33 class U_COMMON_API CaseMap final : public UMemory {
     34 public:
     35    /**
     36     * Lowercases a UTF-16 string and optionally records edits.
     37     * Casing is locale-dependent and context-sensitive.
     38     * The result may be longer or shorter than the original.
     39     * The source string and the destination buffer must not overlap.
     40     *
     41     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
     42     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     43     * @param src       The original string.
     44     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     45     * @param dest      A buffer for the result string. The result will be NUL-terminated if
     46     *                  the buffer is large enough.
     47     *                  The contents is undefined in case of failure.
     48     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
     49     *                  dest may be nullptr and the function will only return the length of the result
     50     *                  without writing any of the result string.
     51     * @param edits     Records edits for index mapping, working with styled text,
     52     *                  and getting only changes (if any).
     53     *                  The Edits contents is undefined if any error occurs.
     54     *                  This function calls edits->reset() first unless
     55     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     56     * @param errorCode Reference to an in/out error code value
     57     *                  which must not indicate a failure before the function call.
     58     * @return The length of the result string, if successful.
     59     *         When the result would be longer than destCapacity,
     60     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
     61     *
     62     * @see u_strToLower
     63     * @stable ICU 59
     64     */
     65     static int32_t toLower(
     66            const char *locale, uint32_t options,
     67            const char16_t *src, int32_t srcLength,
     68            char16_t *dest, int32_t destCapacity, Edits *edits,
     69            UErrorCode &errorCode);
     70 
     71    /**
     72     * Uppercases a UTF-16 string and optionally records edits.
     73     * Casing is locale-dependent and context-sensitive.
     74     * The result may be longer or shorter than the original.
     75     * The source string and the destination buffer must not overlap.
     76     *
     77     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
     78     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     79     * @param src       The original string.
     80     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
     81     * @param dest      A buffer for the result string. The result will be NUL-terminated if
     82     *                  the buffer is large enough.
     83     *                  The contents is undefined in case of failure.
     84     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
     85     *                  dest may be nullptr and the function will only return the length of the result
     86     *                  without writing any of the result string.
     87     * @param edits     Records edits for index mapping, working with styled text,
     88     *                  and getting only changes (if any).
     89     *                  The Edits contents is undefined if any error occurs.
     90     *                  This function calls edits->reset() first unless
     91     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     92     * @param errorCode Reference to an in/out error code value
     93     *                  which must not indicate a failure before the function call.
     94     * @return The length of the result string, if successful.
     95     *         When the result would be longer than destCapacity,
     96     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
     97     *
     98     * @see u_strToUpper
     99     * @stable ICU 59
    100     */
    101    static int32_t toUpper(
    102            const char *locale, uint32_t options,
    103            const char16_t *src, int32_t srcLength,
    104            char16_t *dest, int32_t destCapacity, Edits *edits,
    105            UErrorCode &errorCode);
    106 
    107 #if !UCONFIG_NO_BREAK_ITERATION
    108 
    109    /**
    110     * Titlecases a UTF-16 string and optionally records edits.
    111     * Casing is locale-dependent and context-sensitive.
    112     * The result may be longer or shorter than the original.
    113     * The source string and the destination buffer must not overlap.
    114     *
    115     * Titlecasing uses a break iterator to find the first characters of words
    116     * that are to be titlecased. It titlecases those characters and lowercases
    117     * all others. (This can be modified with options bits.)
    118     *
    119     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    120     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
    121     *                  U_TITLECASE_NO_LOWERCASE,
    122     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
    123     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
    124     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
    125     *                  It is set to the source string (setText())
    126     *                  and used one or more times for iteration (first() and next()).
    127     *                  If nullptr, then a word break iterator for the locale is used
    128     *                  (or something equivalent).
    129     * @param src       The original string.
    130     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    131     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    132     *                  the buffer is large enough.
    133     *                  The contents is undefined in case of failure.
    134     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
    135     *                  dest may be nullptr and the function will only return the length of the result
    136     *                  without writing any of the result string.
    137     * @param edits     Records edits for index mapping, working with styled text,
    138     *                  and getting only changes (if any).
    139     *                  The Edits contents is undefined if any error occurs.
    140     *                  This function calls edits->reset() first unless
    141     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    142     * @param errorCode Reference to an in/out error code value
    143     *                  which must not indicate a failure before the function call.
    144     * @return The length of the result string, if successful.
    145     *         When the result would be longer than destCapacity,
    146     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    147     *
    148     * @see u_strToTitle
    149     * @see ucasemap_toTitle
    150     * @stable ICU 59
    151     */
    152    static int32_t toTitle(
    153            const char *locale, uint32_t options, BreakIterator *iter,
    154            const char16_t *src, int32_t srcLength,
    155            char16_t *dest, int32_t destCapacity, Edits *edits,
    156            UErrorCode &errorCode);
    157 
    158 #endif  // UCONFIG_NO_BREAK_ITERATION
    159 
    160    /**
    161     * Case-folds a UTF-16 string and optionally records edits.
    162     *
    163     * Case folding is locale-independent and not context-sensitive,
    164     * but there is an option for whether to include or exclude mappings for dotted I
    165     * and dotless i that are marked with 'T' in CaseFolding.txt.
    166     *
    167     * The result may be longer or shorter than the original.
    168     * The source string and the destination buffer must not overlap.
    169     *
    170     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
    171     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
    172     * @param src       The original string.
    173     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    174     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    175     *                  the buffer is large enough.
    176     *                  The contents is undefined in case of failure.
    177     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
    178     *                  dest may be nullptr and the function will only return the length of the result
    179     *                  without writing any of the result string.
    180     * @param edits     Records edits for index mapping, working with styled text,
    181     *                  and getting only changes (if any).
    182     *                  The Edits contents is undefined if any error occurs.
    183     *                  This function calls edits->reset() first unless
    184     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    185     * @param errorCode Reference to an in/out error code value
    186     *                  which must not indicate a failure before the function call.
    187     * @return The length of the result string, if successful.
    188     *         When the result would be longer than destCapacity,
    189     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    190     *
    191     * @see u_strFoldCase
    192     * @stable ICU 59
    193     */
    194    static int32_t fold(
    195            uint32_t options,
    196            const char16_t *src, int32_t srcLength,
    197            char16_t *dest, int32_t destCapacity, Edits *edits,
    198            UErrorCode &errorCode);
    199 
    200    /**
    201     * Lowercases a UTF-8 string and optionally records edits.
    202     * Casing is locale-dependent and context-sensitive.
    203     * The result may be longer or shorter than the original.
    204     *
    205     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    206     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
    207     * @param src       The original string.
    208     * @param sink      A ByteSink to which the result string is written.
    209     *                  sink.Flush() is called at the end.
    210     * @param edits     Records edits for index mapping, working with styled text,
    211     *                  and getting only changes (if any).
    212     *                  The Edits contents is undefined if any error occurs.
    213     *                  This function calls edits->reset() first unless
    214     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    215     * @param errorCode Reference to an in/out error code value
    216     *                  which must not indicate a failure before the function call.
    217     *
    218     * @see ucasemap_utf8ToLower
    219     * @stable ICU 60
    220     */
    221    static void utf8ToLower(
    222            const char *locale, uint32_t options,
    223            StringPiece src, ByteSink &sink, Edits *edits,
    224            UErrorCode &errorCode);
    225 
    226    /**
    227     * Uppercases a UTF-8 string and optionally records edits.
    228     * Casing is locale-dependent and context-sensitive.
    229     * The result may be longer or shorter than the original.
    230     *
    231     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    232     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
    233     * @param src       The original string.
    234     * @param sink      A ByteSink to which the result string is written.
    235     *                  sink.Flush() is called at the end.
    236     * @param edits     Records edits for index mapping, working with styled text,
    237     *                  and getting only changes (if any).
    238     *                  The Edits contents is undefined if any error occurs.
    239     *                  This function calls edits->reset() first unless
    240     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    241     * @param errorCode Reference to an in/out error code value
    242     *                  which must not indicate a failure before the function call.
    243     *
    244     * @see ucasemap_utf8ToUpper
    245     * @stable ICU 60
    246     */
    247    static void utf8ToUpper(
    248            const char *locale, uint32_t options,
    249            StringPiece src, ByteSink &sink, Edits *edits,
    250            UErrorCode &errorCode);
    251 
    252 #if !UCONFIG_NO_BREAK_ITERATION
    253 
    254    /**
    255     * Titlecases a UTF-8 string and optionally records edits.
    256     * Casing is locale-dependent and context-sensitive.
    257     * The result may be longer or shorter than the original.
    258     *
    259     * Titlecasing uses a break iterator to find the first characters of words
    260     * that are to be titlecased. It titlecases those characters and lowercases
    261     * all others. (This can be modified with options bits.)
    262     *
    263     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    264     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
    265     *                  U_TITLECASE_NO_LOWERCASE,
    266     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
    267     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
    268     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
    269     *                  It is set to the source string (setUText())
    270     *                  and used one or more times for iteration (first() and next()).
    271     *                  If nullptr, then a word break iterator for the locale is used
    272     *                  (or something equivalent).
    273     * @param src       The original string.
    274     * @param sink      A ByteSink to which the result string is written.
    275     *                  sink.Flush() is called at the end.
    276     * @param edits     Records edits for index mapping, working with styled text,
    277     *                  and getting only changes (if any).
    278     *                  The Edits contents is undefined if any error occurs.
    279     *                  This function calls edits->reset() first unless
    280     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    281     * @param errorCode Reference to an in/out error code value
    282     *                  which must not indicate a failure before the function call.
    283     *
    284     * @see ucasemap_utf8ToTitle
    285     * @stable ICU 60
    286     */
    287    static void utf8ToTitle(
    288            const char *locale, uint32_t options, BreakIterator *iter,
    289            StringPiece src, ByteSink &sink, Edits *edits,
    290            UErrorCode &errorCode);
    291 
    292 #endif  // UCONFIG_NO_BREAK_ITERATION
    293 
    294    /**
    295     * Case-folds a UTF-8 string and optionally records edits.
    296     *
    297     * Case folding is locale-independent and not context-sensitive,
    298     * but there is an option for whether to include or exclude mappings for dotted I
    299     * and dotless i that are marked with 'T' in CaseFolding.txt.
    300     *
    301     * The result may be longer or shorter than the original.
    302     *
    303     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
    304     * @param src       The original string.
    305     * @param sink      A ByteSink to which the result string is written.
    306     *                  sink.Flush() is called at the end.
    307     * @param edits     Records edits for index mapping, working with styled text,
    308     *                  and getting only changes (if any).
    309     *                  The Edits contents is undefined if any error occurs.
    310     *                  This function calls edits->reset() first unless
    311     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    312     * @param errorCode Reference to an in/out error code value
    313     *                  which must not indicate a failure before the function call.
    314     *
    315     * @see ucasemap_utf8FoldCase
    316     * @stable ICU 60
    317     */
    318    static void utf8Fold(
    319            uint32_t options,
    320            StringPiece src, ByteSink &sink, Edits *edits,
    321            UErrorCode &errorCode);
    322 
    323    /**
    324     * Lowercases a UTF-8 string and optionally records edits.
    325     * Casing is locale-dependent and context-sensitive.
    326     * The result may be longer or shorter than the original.
    327     * The source string and the destination buffer must not overlap.
    328     *
    329     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    330     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
    331     * @param src       The original string.
    332     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    333     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    334     *                  the buffer is large enough.
    335     *                  The contents is undefined in case of failure.
    336     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    337     *                  dest may be nullptr and the function will only return the length of the result
    338     *                  without writing any of the result string.
    339     * @param edits     Records edits for index mapping, working with styled text,
    340     *                  and getting only changes (if any).
    341     *                  The Edits contents is undefined if any error occurs.
    342     *                  This function calls edits->reset() first unless
    343     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    344     * @param errorCode Reference to an in/out error code value
    345     *                  which must not indicate a failure before the function call.
    346     * @return The length of the result string, if successful.
    347     *         When the result would be longer than destCapacity,
    348     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    349     *
    350     * @see ucasemap_utf8ToLower
    351     * @stable ICU 59
    352     */
    353    static int32_t utf8ToLower(
    354            const char *locale, uint32_t options,
    355            const char *src, int32_t srcLength,
    356            char *dest, int32_t destCapacity, Edits *edits,
    357            UErrorCode &errorCode);
    358 
    359    /**
    360     * Uppercases a UTF-8 string and optionally records edits.
    361     * Casing is locale-dependent and context-sensitive.
    362     * The result may be longer or shorter than the original.
    363     * The source string and the destination buffer must not overlap.
    364     *
    365     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    366     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
    367     * @param src       The original string.
    368     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    369     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    370     *                  the buffer is large enough.
    371     *                  The contents is undefined in case of failure.
    372     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    373     *                  dest may be nullptr and the function will only return the length of the result
    374     *                  without writing any of the result string.
    375     * @param edits     Records edits for index mapping, working with styled text,
    376     *                  and getting only changes (if any).
    377     *                  The Edits contents is undefined if any error occurs.
    378     *                  This function calls edits->reset() first unless
    379     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    380     * @param errorCode Reference to an in/out error code value
    381     *                  which must not indicate a failure before the function call.
    382     * @return The length of the result string, if successful.
    383     *         When the result would be longer than destCapacity,
    384     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    385     *
    386     * @see ucasemap_utf8ToUpper
    387     * @stable ICU 59
    388     */
    389    static int32_t utf8ToUpper(
    390            const char *locale, uint32_t options,
    391            const char *src, int32_t srcLength,
    392            char *dest, int32_t destCapacity, Edits *edits,
    393            UErrorCode &errorCode);
    394 
    395 #if !UCONFIG_NO_BREAK_ITERATION
    396 
    397    /**
    398     * Titlecases a UTF-8 string and optionally records edits.
    399     * Casing is locale-dependent and context-sensitive.
    400     * The result may be longer or shorter than the original.
    401     * The source string and the destination buffer must not overlap.
    402     *
    403     * Titlecasing uses a break iterator to find the first characters of words
    404     * that are to be titlecased. It titlecases those characters and lowercases
    405     * all others. (This can be modified with options bits.)
    406     *
    407     * @param locale    The locale ID. ("" = root locale, nullptr = default locale.)
    408     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
    409     *                  U_TITLECASE_NO_LOWERCASE,
    410     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
    411     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
    412     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
    413     *                  It is set to the source string (setUText())
    414     *                  and used one or more times for iteration (first() and next()).
    415     *                  If nullptr, then a word break iterator for the locale is used
    416     *                  (or something equivalent).
    417     * @param src       The original string.
    418     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    419     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    420     *                  the buffer is large enough.
    421     *                  The contents is undefined in case of failure.
    422     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    423     *                  dest may be nullptr and the function will only return the length of the result
    424     *                  without writing any of the result string.
    425     * @param edits     Records edits for index mapping, working with styled text,
    426     *                  and getting only changes (if any).
    427     *                  The Edits contents is undefined if any error occurs.
    428     *                  This function calls edits->reset() first unless
    429     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    430     * @param errorCode Reference to an in/out error code value
    431     *                  which must not indicate a failure before the function call.
    432     * @return The length of the result string, if successful.
    433     *         When the result would be longer than destCapacity,
    434     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    435     *
    436     * @see ucasemap_utf8ToTitle
    437     * @stable ICU 59
    438     */
    439    static int32_t utf8ToTitle(
    440            const char *locale, uint32_t options, BreakIterator *iter,
    441            const char *src, int32_t srcLength,
    442            char *dest, int32_t destCapacity, Edits *edits,
    443            UErrorCode &errorCode);
    444 
    445 #endif  // UCONFIG_NO_BREAK_ITERATION
    446 
    447    /**
    448     * Case-folds a UTF-8 string and optionally records edits.
    449     *
    450     * Case folding is locale-independent and not context-sensitive,
    451     * but there is an option for whether to include or exclude mappings for dotted I
    452     * and dotless i that are marked with 'T' in CaseFolding.txt.
    453     *
    454     * The result may be longer or shorter than the original.
    455     * The source string and the destination buffer must not overlap.
    456     *
    457     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
    458     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
    459     * @param src       The original string.
    460     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
    461     * @param dest      A buffer for the result string. The result will be NUL-terminated if
    462     *                  the buffer is large enough.
    463     *                  The contents is undefined in case of failure.
    464     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
    465     *                  dest may be nullptr and the function will only return the length of the result
    466     *                  without writing any of the result string.
    467     * @param edits     Records edits for index mapping, working with styled text,
    468     *                  and getting only changes (if any).
    469     *                  The Edits contents is undefined if any error occurs.
    470     *                  This function calls edits->reset() first unless
    471     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
    472     * @param errorCode Reference to an in/out error code value
    473     *                  which must not indicate a failure before the function call.
    474     * @return The length of the result string, if successful.
    475     *         When the result would be longer than destCapacity,
    476     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
    477     *
    478     * @see ucasemap_utf8FoldCase
    479     * @stable ICU 59
    480     */
    481    static int32_t utf8Fold(
    482            uint32_t options,
    483            const char *src, int32_t srcLength,
    484            char *dest, int32_t destCapacity, Edits *edits,
    485            UErrorCode &errorCode);
    486 
    487 private:
    488    CaseMap() = delete;
    489    CaseMap(const CaseMap &other) = delete;
    490    CaseMap &operator=(const CaseMap &other) = delete;
    491 };
    492 
    493 U_NAMESPACE_END
    494 
    495 #endif /* U_SHOW_CPLUSPLUS_API */
    496 
    497 #endif  // __CASEMAP_H__