tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucpmap.h (5674B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // ucpmap.h
      5 // created: 2018sep03 Markus W. Scherer
      6 
      7 #ifndef __UCPMAP_H__
      8 #define __UCPMAP_H__
      9 
     10 #include "unicode/utypes.h"
     11 
     12 U_CDECL_BEGIN
     13 
     14 /**
     15 * \file
     16 * \brief C API: This file defines an abstract map from Unicode code points to integer values.
     17 *
     18 * @see UCPMap
     19 * @see UCPTrie
     20 * @see UMutableCPTrie
     21 */
     22 
     23 /**
     24 * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
     25 *
     26 * @see UCPTrie
     27 * @see UMutableCPTrie
     28 * @stable ICU 63
     29 */
     30 typedef struct UCPMap UCPMap;
     31 
     32 /**
     33 * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
     34 * Most users should use UCPMAP_RANGE_NORMAL.
     35 *
     36 * @see ucpmap_getRange
     37 * @see ucptrie_getRange
     38 * @see umutablecptrie_getRange
     39 * @stable ICU 63
     40 */
     41 enum UCPMapRangeOption {
     42    /**
     43     * ucpmap_getRange() enumerates all same-value ranges as stored in the map.
     44     * Most users should use this option.
     45     * @stable ICU 63
     46     */
     47    UCPMAP_RANGE_NORMAL,
     48    /**
     49     * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
     50     * except that lead surrogates (U+D800..U+DBFF) are treated as having the
     51     * surrogateValue, which is passed to getRange() as a separate parameter.
     52     * The surrogateValue is not transformed via filter().
     53     * See U_IS_LEAD(c).
     54     *
     55     * Most users should use UCPMAP_RANGE_NORMAL instead.
     56     *
     57     * This option is useful for maps that map surrogate code *units* to
     58     * special values optimized for UTF-16 string processing
     59     * or for special error behavior for unpaired surrogates,
     60     * but those values are not to be associated with the lead surrogate code *points*.
     61     * @stable ICU 63
     62     */
     63    UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
     64    /**
     65     * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
     66     * except that all surrogates (U+D800..U+DFFF) are treated as having the
     67     * surrogateValue, which is passed to getRange() as a separate parameter.
     68     * The surrogateValue is not transformed via filter().
     69     * See U_IS_SURROGATE(c).
     70     *
     71     * Most users should use UCPMAP_RANGE_NORMAL instead.
     72     *
     73     * This option is useful for maps that map surrogate code *units* to
     74     * special values optimized for UTF-16 string processing
     75     * or for special error behavior for unpaired surrogates,
     76     * but those values are not to be associated with the lead surrogate code *points*.
     77     * @stable ICU 63
     78     */
     79    UCPMAP_RANGE_FIXED_ALL_SURROGATES
     80 };
     81 #ifndef U_IN_DOXYGEN
     82 typedef enum UCPMapRangeOption UCPMapRangeOption;
     83 #endif
     84 
     85 /**
     86 * Returns the value for a code point as stored in the map, with range checking.
     87 * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
     88 *
     89 * @param map the map
     90 * @param c the code point
     91 * @return the map value,
     92 *         or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
     93 * @stable ICU 63
     94 */
     95 U_CAPI uint32_t U_EXPORT2
     96 ucpmap_get(const UCPMap *map, UChar32 c);
     97 
     98 /**
     99 * Callback function type: Modifies a map value.
    100 * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
    101 * The modified value will be returned by the getRange function.
    102 *
    103 * Can be used to ignore some of the value bits,
    104 * make a filter for one of several values,
    105 * return a value index computed from the map value, etc.
    106 *
    107 * @param context an opaque pointer, as passed into the getRange function
    108 * @param value a value from the map
    109 * @return the modified value
    110 * @stable ICU 63
    111 */
    112 typedef uint32_t U_CALLCONV
    113 UCPMapValueFilter(const void *context, uint32_t value);
    114 
    115 /**
    116 * Returns the last code point such that all those from start to there have the same value.
    117 * Can be used to efficiently iterate over all same-value ranges in a map.
    118 * (This is normally faster than iterating over code points and get()ting each value,
    119 * but much slower than a data structure that stores ranges directly.)
    120 *
    121 * If the UCPMapValueFilter function pointer is not NULL, then
    122 * the value to be delivered is passed through that function, and the return value is the end
    123 * of the range where all values are modified to the same actual value.
    124 * The value is unchanged if that function pointer is NULL.
    125 *
    126 * Example:
    127 * \code
    128 * UChar32 start = 0, end;
    129 * uint32_t value;
    130 * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
    131 *                               NULL, NULL, &value)) >= 0) {
    132 *     // Work with the range start..end and its value.
    133 *     start = end + 1;
    134 * }
    135 * \endcode
    136 *
    137 * @param map the map
    138 * @param start range start
    139 * @param option defines whether surrogates are treated normally,
    140 *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
    141 * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
    142 * @param filter a pointer to a function that may modify the map data value,
    143 *     or NULL if the values from the map are to be used unmodified
    144 * @param context an opaque pointer that is passed on to the filter function
    145 * @param pValue if not NULL, receives the value that every code point start..end has;
    146 *     may have been modified by filter(context, map value)
    147 *     if that function pointer is not NULL
    148 * @return the range end code point, or -1 if start is not a valid code point
    149 * @stable ICU 63
    150 */
    151 U_CAPI UChar32 U_EXPORT2
    152 ucpmap_getRange(const UCPMap *map, UChar32 start,
    153                UCPMapRangeOption option, uint32_t surrogateValue,
    154                UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
    155 
    156 U_CDECL_END
    157 
    158 #endif