tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

propsvec.h (5902B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2002-2010, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  propsvec.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2002feb22
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Store bits (Unicode character properties) in bit set vectors.
     19 */
     20 
     21 #ifndef __UPROPSVEC_H__
     22 #define __UPROPSVEC_H__
     23 
     24 #include "unicode/utypes.h"
     25 #include "utrie.h"
     26 #include "utrie2.h"
     27 
     28 U_CDECL_BEGIN
     29 
     30 /**
     31 * Unicode Properties Vectors associated with code point ranges.
     32 *
     33 * Rows of uint32_t integers in a contiguous array store
     34 * the range limits and the properties vectors.
     35 *
     36 * Logically, each row has a certain number of uint32_t values,
     37 * which is set via the upvec_open() "columns" parameter.
     38 *
     39 * Internally, two additional columns are stored.
     40 * In each internal row,
     41 * row[0] contains the start code point and
     42 * row[1] contains the limit code point,
     43 * which is the start of the next range.
     44 *
     45 * Initially, there is only one "normal" row for
     46 * range [0..0x110000[ with values 0.
     47 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
     48 *
     49 * It would be possible to store only one range boundary per row,
     50 * but self-contained rows allow to later sort them by contents.
     51 */
     52 struct UPropsVectors;
     53 typedef struct UPropsVectors UPropsVectors;
     54 
     55 /*
     56 * Special pseudo code points for storing the initialValue and the errorValue,
     57 * which are used to initialize a UTrie2 or similar.
     58 */
     59 #define UPVEC_FIRST_SPECIAL_CP 0x110000
     60 #define UPVEC_INITIAL_VALUE_CP 0x110000
     61 #define UPVEC_ERROR_VALUE_CP 0x110001
     62 #define UPVEC_MAX_CP 0x110001
     63 
     64 /*
     65 * Special pseudo code point used in upvec_compact() signalling the end of
     66 * delivering special values and the beginning of delivering real ones.
     67 * Stable value, unlike UPVEC_MAX_CP which might grow over time.
     68 */
     69 #define UPVEC_START_REAL_VALUES_CP 0x200000
     70 
     71 /*
     72 * Open a UPropsVectors object.
     73 * @param columns Number of value integers (uint32_t) per row.
     74 */
     75 U_CAPI UPropsVectors * U_EXPORT2
     76 upvec_open(int32_t columns, UErrorCode *pErrorCode);
     77 
     78 U_CAPI void U_EXPORT2
     79 upvec_close(UPropsVectors *pv);
     80 
     81 /*
     82 * In rows for code points [start..end], select the column,
     83 * reset the mask bits and set the value bits (ANDed with the mask).
     84 *
     85 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
     86 */
     87 U_CAPI void U_EXPORT2
     88 upvec_setValue(UPropsVectors *pv,
     89               UChar32 start, UChar32 end,
     90               int32_t column,
     91               uint32_t value, uint32_t mask,
     92               UErrorCode *pErrorCode);
     93 
     94 /*
     95 * Logically const but must not be used on the same pv concurrently!
     96 * Always returns 0 if called after upvec_compact().
     97 */
     98 U_CAPI uint32_t U_EXPORT2
     99 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
    100 
    101 /*
    102 * pRangeStart and pRangeEnd can be NULL.
    103 * @return NULL if rowIndex out of range and for illegal arguments,
    104 *         or if called after upvec_compact()
    105 */
    106 U_CAPI uint32_t * U_EXPORT2
    107 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
    108             UChar32 *pRangeStart, UChar32 *pRangeEnd);
    109 
    110 /*
    111 * Compact the vectors:
    112 * - modify the memory
    113 * - keep only unique vectors
    114 * - store them contiguously from the beginning of the memory
    115 * - for each (non-unique) row, call the handler function
    116 *
    117 * The handler's rowIndex is the index of the row in the compacted
    118 * memory block.
    119 * (Therefore, it starts at 0 increases in increments of the columns value.)
    120 *
    121 * In a first phase, only special values are delivered (each exactly once),
    122 * with start==end both equalling a special pseudo code point.
    123 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
    124 * where rowIndex is the length of the compacted array,
    125 * and the row is arbitrary (but not NULL).
    126 * Then, in the second phase, the handler is called for each row of real values.
    127 */
    128 typedef void U_CALLCONV
    129 UPVecCompactHandler(void *context,
    130                    UChar32 start, UChar32 end,
    131                    int32_t rowIndex, uint32_t *row, int32_t columns,
    132                    UErrorCode *pErrorCode);
    133 
    134 U_CAPI void U_EXPORT2
    135 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
    136 
    137 /*
    138 * Get the vectors array after calling upvec_compact().
    139 * The caller must not modify nor release the returned array.
    140 * Returns NULL if called before upvec_compact().
    141 */
    142 U_CAPI const uint32_t * U_EXPORT2
    143 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
    144 
    145 /*
    146 * Get a clone of the vectors array after calling upvec_compact().
    147 * The caller owns the returned array and must uprv_free() it.
    148 * Returns NULL if called before upvec_compact().
    149 */
    150 U_CAPI uint32_t * U_EXPORT2
    151 upvec_cloneArray(const UPropsVectors *pv,
    152                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
    153 
    154 /*
    155 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
    156 * vectors array, and freeze the trie.
    157 */
    158 U_CAPI UTrie2 * U_EXPORT2
    159 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
    160 
    161 struct UPVecToUTrie2Context {
    162    UTrie2 *trie;
    163    int32_t initialValue;
    164    int32_t errorValue;
    165    int32_t maxValue;
    166 };
    167 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
    168 
    169 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
    170 U_CAPI void U_CALLCONV
    171 upvec_compactToUTrie2Handler(void *context,
    172                             UChar32 start, UChar32 end,
    173                             int32_t rowIndex, uint32_t *row, int32_t columns,
    174                             UErrorCode *pErrorCode);
    175 
    176 U_CDECL_END
    177 
    178 #endif