tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utrie2_builder.cpp (48178B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 2001-2014, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  utrie2_builder.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2008sep26 (split off from utrie2.c)
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This is a common implementation of a Unicode trie.
     19 *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
     20 *   Unicode code points (0..0x10ffff).
     21 *   This is the second common version of a Unicode trie (hence the name UTrie2).
     22 *   See utrie2.h for a comparison.
     23 *
     24 *   This file contains only the builder code.
     25 *   See utrie2.c for the runtime and enumeration code.
     26 */
     27 // #define UTRIE2_DEBUG
     28 #ifdef UTRIE2_DEBUG
     29 #   include <stdio.h>
     30 #endif
     31 // #define UCPTRIE_DEBUG
     32 
     33 #include "unicode/utypes.h"
     34 #ifdef UCPTRIE_DEBUG
     35 #include "unicode/ucptrie.h"
     36 #include "unicode/umutablecptrie.h"
     37 #include "ucptrie_impl.h"
     38 #endif
     39 #include "cmemory.h"
     40 #include "utrie2.h"
     41 #include "utrie2_impl.h"
     42 
     43 #include "utrie.h"  // for utrie2_fromUTrie()
     44 
     45 /* Implementation notes ----------------------------------------------------- */
     46 
     47 /*
     48 * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
     49 * have been chosen to minimize trie sizes overall.
     50 * Most of the code is flexible enough to work with a range of values,
     51 * within certain limits.
     52 *
     53 * Exception: Support for separate values for lead surrogate code _units_
     54 * vs. code _points_ was added after the constants were fixed,
     55 * and has not been tested nor particularly designed for different constant values.
     56 * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
     57 * part and back.)
     58 *
     59 * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
     60 * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
     61 * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
     62 * remapping) stops working.
     63 *
     64 * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
     65 * assumes that a single index-2 block is used for 0x400 code points
     66 * corresponding to one lead surrogate.
     67 *
     68 * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
     69 * more than one Unicode plane, and the split of the index-2 table into a BMP
     70 * part and a supplementary part, with a gap in between, would not work.
     71 *
     72 * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
     73 * there is data with more than 64k distinct values,
     74 * for example for Unihan collation with a separate collation weight per
     75 * Han character.
     76 */
     77 
     78 /* Building a trie ----------------------------------------------------------*/
     79 
     80 enum {
     81    /** The null index-2 block, following the gap in the index-2 table. */
     82    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
     83 
     84    /** The start of allocated index-2 blocks. */
     85    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
     86 
     87    /**
     88     * The null data block.
     89     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
     90     * to work with 6-bit trail bytes from 2-byte UTF-8.
     91     */
     92    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
     93 
     94    /** The start of allocated data blocks. */
     95    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
     96 
     97    /**
     98     * The start of data blocks for U+0800 and above.
     99     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
    100     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
    101     * Data values for 0x780 code points beyond ASCII.
    102     */
    103    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
    104 };
    105 
    106 /* Start with allocation of 16k data entries. */
    107 #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
    108 
    109 /* Grow about 8x each time. */
    110 #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
    111 
    112 static int32_t
    113 allocIndex2Block(UNewTrie2 *trie);
    114 
    115 U_CAPI UTrie2 * U_EXPORT2
    116 utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
    117    UTrie2 *trie;
    118    UNewTrie2 *newTrie;
    119    uint32_t *data;
    120    int32_t i, j;
    121 
    122    if(U_FAILURE(*pErrorCode)) {
    123        return nullptr;
    124    }
    125 
    126    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
    127    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
    128    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
    129    if(trie==nullptr || newTrie==nullptr || data==nullptr) {
    130        uprv_free(trie);
    131        uprv_free(newTrie);
    132        uprv_free(data);
    133        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    134        return nullptr;
    135    }
    136 
    137    uprv_memset(trie, 0, sizeof(UTrie2));
    138    trie->initialValue=initialValue;
    139    trie->errorValue=errorValue;
    140    trie->highStart=0x110000;
    141    trie->newTrie=newTrie;
    142 #ifdef UTRIE2_DEBUG
    143    trie->name="open";
    144 #endif
    145 
    146    newTrie->data=data;
    147 #ifdef UCPTRIE_DEBUG
    148    newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode);
    149 #endif
    150    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
    151    newTrie->initialValue=initialValue;
    152    newTrie->errorValue=errorValue;
    153    newTrie->highStart=0x110000;
    154    newTrie->firstFreeBlock=0;  /* no free block in the list */
    155    newTrie->isCompacted=false;
    156 
    157    /*
    158     * preallocate and reset
    159     * - ASCII
    160     * - the bad-UTF-8-data block
    161     * - the null data block
    162     */
    163    for(i=0; i<0x80; ++i) {
    164        newTrie->data[i]=initialValue;
    165    }
    166    for(; i<0xc0; ++i) {
    167        newTrie->data[i]=errorValue;
    168    }
    169    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
    170        newTrie->data[i]=initialValue;
    171    }
    172    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
    173    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
    174 
    175    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
    176    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
    177        newTrie->index2[i]=j;
    178        newTrie->map[i]=1;
    179    }
    180    /* reference counts for the bad-UTF-8-data block */
    181    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
    182        newTrie->map[i]=0;
    183    }
    184    /*
    185     * Reference counts for the null data block: all blocks except for the ASCII blocks.
    186     * Plus 1 so that we don't drop this block during compaction.
    187     * Plus as many as needed for lead surrogate code points.
    188     */
    189    /* i==newTrie->dataNullOffset */
    190    newTrie->map[i++]=
    191        (0x110000>>UTRIE2_SHIFT_2)-
    192        (0x80>>UTRIE2_SHIFT_2)+
    193        1+
    194        UTRIE2_LSCP_INDEX_2_LENGTH;
    195    j+=UTRIE2_DATA_BLOCK_LENGTH;
    196    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
    197        newTrie->map[i]=0;
    198    }
    199 
    200    /*
    201     * set the remaining indexes in the BMP index-2 block
    202     * to the null data block
    203     */
    204    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
    205        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
    206    }
    207 
    208    /*
    209     * Fill the index gap with impossible values so that compaction
    210     * does not overlap other index-2 blocks with the gap.
    211     */
    212    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
    213        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
    214    }
    215 
    216    /* set the indexes in the null index-2 block */
    217    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
    218        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
    219    }
    220    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
    221    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
    222 
    223    /* set the index-1 indexes for the linear index-2 block */
    224    for(i=0, j=0;
    225        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
    226        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
    227    ) {
    228        newTrie->index1[i]=j;
    229    }
    230 
    231    /* set the remaining index-1 indexes to the null index-2 block */
    232    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
    233        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
    234    }
    235 
    236    /*
    237     * Preallocate and reset data for U+0080..U+07ff,
    238     * for 2-byte UTF-8 which will be compacted in 64-blocks
    239     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
    240     */
    241    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
    242        utrie2_set32(trie, i, initialValue, pErrorCode);
    243    }
    244 
    245    return trie;
    246 }
    247 
    248 static UNewTrie2 *
    249 cloneBuilder(const UNewTrie2 *other) {
    250    UNewTrie2 *trie;
    251 
    252    trie = static_cast<UNewTrie2*>(uprv_malloc(sizeof(UNewTrie2)));
    253    if(trie==nullptr) {
    254        return nullptr;
    255    }
    256 
    257    trie->data = static_cast<uint32_t*>(uprv_malloc(other->dataCapacity * 4));
    258    if(trie->data==nullptr) {
    259        uprv_free(trie);
    260        return nullptr;
    261    }
    262 #ifdef UCPTRIE_DEBUG
    263    if(other->t3==nullptr) {
    264        trie->t3=nullptr;
    265    } else {
    266        UErrorCode errorCode=U_ZERO_ERROR;
    267        trie->t3=umutablecptrie_clone(other->t3, &errorCode);
    268    }
    269 #endif
    270    trie->dataCapacity=other->dataCapacity;
    271 
    272    /* clone data */
    273    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
    274    uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
    275    trie->index2NullOffset=other->index2NullOffset;
    276    trie->index2Length=other->index2Length;
    277 
    278    uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
    279    trie->dataNullOffset=other->dataNullOffset;
    280    trie->dataLength=other->dataLength;
    281 
    282    /* reference counters */
    283    if(other->isCompacted) {
    284        trie->firstFreeBlock=0;
    285    } else {
    286        uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
    287        trie->firstFreeBlock=other->firstFreeBlock;
    288    }
    289 
    290    trie->initialValue=other->initialValue;
    291    trie->errorValue=other->errorValue;
    292    trie->highStart=other->highStart;
    293    trie->isCompacted=other->isCompacted;
    294 
    295    return trie;
    296 }
    297 
    298 U_CAPI UTrie2 * U_EXPORT2
    299 utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
    300    UTrie2 *trie;
    301 
    302    if(U_FAILURE(*pErrorCode)) {
    303        return nullptr;
    304    }
    305    if(other==nullptr || (other->memory==nullptr && other->newTrie==nullptr)) {
    306        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    307        return nullptr;
    308    }
    309 
    310    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
    311    if(trie==nullptr) {
    312        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    313        return nullptr;
    314    }
    315    uprv_memcpy(trie, other, sizeof(UTrie2));
    316 
    317    if(other->memory!=nullptr) {
    318        trie->memory=uprv_malloc(other->length);
    319        if(trie->memory!=nullptr) {
    320            trie->isMemoryOwned=true;
    321            uprv_memcpy(trie->memory, other->memory, other->length);
    322 
    323            /* make the clone's pointers point to its own memory */
    324            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
    325            if(other->data16!=nullptr) {
    326                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
    327            }
    328            if(other->data32!=nullptr) {
    329                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
    330            }
    331        }
    332    } else /* other->newTrie!=nullptr */ {
    333        trie->newTrie=cloneBuilder(other->newTrie);
    334    }
    335 
    336    if(trie->memory==nullptr && trie->newTrie==nullptr) {
    337        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    338        uprv_free(trie);
    339        trie=nullptr;
    340    }
    341    return trie;
    342 }
    343 
    344 typedef struct NewTrieAndStatus {
    345    UTrie2 *trie;
    346    UErrorCode errorCode;
    347    UBool exclusiveLimit;  /* rather than inclusive range end */
    348 } NewTrieAndStatus;
    349 
    350 static UBool U_CALLCONV
    351 copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    352    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
    353    if(value!=nt->trie->initialValue) {
    354        if(nt->exclusiveLimit) {
    355            --end;
    356        }
    357        if(start==end) {
    358            utrie2_set32(nt->trie, start, value, &nt->errorCode);
    359        } else {
    360            utrie2_setRange32(nt->trie, start, end, value, true, &nt->errorCode);
    361        }
    362        return U_SUCCESS(nt->errorCode);
    363    } else {
    364        return true;
    365    }
    366 }
    367 
    368 #ifdef UTRIE2_DEBUG
    369 static long countInitial(const UTrie2 *trie) {
    370    uint32_t initialValue=trie->initialValue;
    371    int32_t length=trie->dataLength;
    372    long count=0;
    373    if(trie->data16!=nullptr) {
    374        for(int32_t i=0; i<length; ++i) {
    375            if(trie->data16[i]==initialValue) { ++count; }
    376        }
    377    } else {
    378        for(int32_t i=0; i<length; ++i) {
    379            if(trie->data32[i]==initialValue) { ++count; }
    380        }
    381    }
    382    return count;
    383 }
    384 
    385 static void
    386 utrie_printLengths(const UTrie *trie) {
    387    long indexLength=trie->indexLength;
    388    long dataLength=(long)trie->dataLength;
    389    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=nullptr ? 4 : 2);
    390    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
    391           indexLength, dataLength, totalLength);
    392 }
    393 
    394 static void
    395 utrie2_printLengths(const UTrie2 *trie, const char *which) {
    396    long indexLength=trie->indexLength;
    397    long dataLength=(long)trie->dataLength;
    398    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=nullptr ? 4 : 2);
    399    printf("**UTrie2Lengths(%s %s)** index:%6ld  data:%6ld  countInitial:%6ld  serialized:%6ld\n",
    400           which, trie->name, indexLength, dataLength, countInitial(trie), totalLength);
    401 }
    402 #endif
    403 
    404 U_CAPI UTrie2 * U_EXPORT2
    405 utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
    406    NewTrieAndStatus context;
    407    char16_t lead;
    408 
    409    if(U_FAILURE(*pErrorCode)) {
    410        return nullptr;
    411    }
    412    if(other==nullptr || (other->memory==nullptr && other->newTrie==nullptr)) {
    413        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    414        return nullptr;
    415    }
    416    if(other->newTrie!=nullptr && !other->newTrie->isCompacted) {
    417        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
    418    }
    419 
    420    /* Clone the frozen trie by enumerating it and building a new one. */
    421    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
    422    if(U_FAILURE(*pErrorCode)) {
    423        return nullptr;
    424    }
    425    context.exclusiveLimit=false;
    426    context.errorCode=*pErrorCode;
    427    utrie2_enum(other, nullptr, copyEnumRange, &context);
    428    *pErrorCode=context.errorCode;
    429    for(lead=0xd800; lead<0xdc00; ++lead) {
    430        uint32_t value;
    431        if(other->data32==nullptr) {
    432            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
    433        } else {
    434            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
    435        }
    436        if(value!=other->initialValue) {
    437            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
    438        }
    439    }
    440    if(U_FAILURE(*pErrorCode)) {
    441        utrie2_close(context.trie);
    442        context.trie=nullptr;
    443    }
    444    return context.trie;
    445 }
    446 
    447 /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
    448 U_CAPI UTrie2 * U_EXPORT2
    449 utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
    450    NewTrieAndStatus context;
    451    char16_t lead;
    452 
    453    if(U_FAILURE(*pErrorCode)) {
    454        return nullptr;
    455    }
    456    if(trie1==nullptr) {
    457        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    458        return nullptr;
    459    }
    460    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
    461    if(U_FAILURE(*pErrorCode)) {
    462        return nullptr;
    463    }
    464    context.exclusiveLimit=true;
    465    context.errorCode=*pErrorCode;
    466    utrie_enum(trie1, nullptr, copyEnumRange, &context);
    467    *pErrorCode=context.errorCode;
    468    for(lead=0xd800; lead<0xdc00; ++lead) {
    469        uint32_t value;
    470        if(trie1->data32==nullptr) {
    471            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
    472        } else {
    473            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
    474        }
    475        if(value!=trie1->initialValue) {
    476            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
    477        }
    478    }
    479    if(U_SUCCESS(*pErrorCode)) {
    480        utrie2_freeze(context.trie,
    481                      trie1->data32!=nullptr ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
    482                      pErrorCode);
    483    }
    484 #ifdef UTRIE2_DEBUG
    485    if(U_SUCCESS(*pErrorCode)) {
    486        utrie_printLengths(trie1);
    487        utrie2_printLengths(context.trie, "fromUTrie");
    488    }
    489 #endif
    490    if(U_FAILURE(*pErrorCode)) {
    491        utrie2_close(context.trie);
    492        context.trie=nullptr;
    493    }
    494    return context.trie;
    495 }
    496 
    497 static inline UBool
    498 isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
    499    int32_t i2, block;
    500 
    501    if(U_IS_LEAD(c) && forLSCP) {
    502        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
    503            (c>>UTRIE2_SHIFT_2);
    504    } else {
    505        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
    506            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
    507    }
    508    block=trie->index2[i2];
    509    return block == trie->dataNullOffset;
    510 }
    511 
    512 static int32_t
    513 allocIndex2Block(UNewTrie2 *trie) {
    514    int32_t newBlock, newTop;
    515 
    516    newBlock=trie->index2Length;
    517    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
    518    if(newTop>UPRV_LENGTHOF(trie->index2)) {
    519        /*
    520         * Should never occur.
    521         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
    522         * or the code writes more values than should be possible.
    523         */
    524        return -1;
    525    }
    526    trie->index2Length=newTop;
    527    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
    528    return newBlock;
    529 }
    530 
    531 static int32_t
    532 getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
    533    int32_t i1, i2;
    534 
    535    if(U_IS_LEAD(c) && forLSCP) {
    536        return UTRIE2_LSCP_INDEX_2_OFFSET;
    537    }
    538 
    539    i1=c>>UTRIE2_SHIFT_1;
    540    i2=trie->index1[i1];
    541    if(i2==trie->index2NullOffset) {
    542        i2=allocIndex2Block(trie);
    543        if(i2<0) {
    544            return -1;  /* program error */
    545        }
    546        trie->index1[i1]=i2;
    547    }
    548    return i2;
    549 }
    550 
    551 static int32_t
    552 allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
    553    int32_t newBlock, newTop;
    554 
    555    if(trie->firstFreeBlock!=0) {
    556        /* get the first free block */
    557        newBlock=trie->firstFreeBlock;
    558        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
    559    } else {
    560        /* get a new block from the high end */
    561        newBlock=trie->dataLength;
    562        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
    563        if(newTop>trie->dataCapacity) {
    564            /* out of memory in the data array */
    565            int32_t capacity;
    566            uint32_t *data;
    567 
    568            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
    569                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
    570            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
    571                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
    572            } else {
    573                /*
    574                 * Should never occur.
    575                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
    576                 * or the code writes more values than should be possible.
    577                 */
    578                return -1;
    579            }
    580            data = static_cast<uint32_t*>(uprv_malloc(capacity * 4));
    581            if(data==nullptr) {
    582                return -1;
    583            }
    584            uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
    585            uprv_free(trie->data);
    586            trie->data=data;
    587            trie->dataCapacity=capacity;
    588        }
    589        trie->dataLength=newTop;
    590    }
    591    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
    592    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
    593    return newBlock;
    594 }
    595 
    596 /* call when the block's reference counter reaches 0 */
    597 static void
    598 releaseDataBlock(UNewTrie2 *trie, int32_t block) {
    599    /* put this block at the front of the free-block chain */
    600    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
    601    trie->firstFreeBlock=block;
    602 }
    603 
    604 static inline UBool
    605 isWritableBlock(UNewTrie2 *trie, int32_t block) {
    606    return block != trie->dataNullOffset && 1 == trie->map[block >> UTRIE2_SHIFT_2];
    607 }
    608 
    609 static inline void
    610 setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
    611    int32_t oldBlock;
    612    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
    613    oldBlock=trie->index2[i2];
    614    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
    615        releaseDataBlock(trie, oldBlock);
    616    }
    617    trie->index2[i2]=block;
    618 }
    619 
    620 /**
    621 * No error checking for illegal arguments.
    622 *
    623 * @return -1 if no new data block available (out of memory in data array)
    624 * @internal
    625 */
    626 static int32_t
    627 getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
    628    int32_t i2, oldBlock, newBlock;
    629 
    630    i2=getIndex2Block(trie, c, forLSCP);
    631    if(i2<0) {
    632        return -1;  /* program error */
    633    }
    634 
    635    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
    636    oldBlock=trie->index2[i2];
    637    if(isWritableBlock(trie, oldBlock)) {
    638        return oldBlock;
    639    }
    640 
    641    /* allocate a new data block */
    642    newBlock=allocDataBlock(trie, oldBlock);
    643    if(newBlock<0) {
    644        /* out of memory in the data array */
    645        return -1;
    646    }
    647    setIndex2Entry(trie, i2, newBlock);
    648    return newBlock;
    649 }
    650 
    651 /**
    652 * @return true if the value was successfully set
    653 */
    654 static void
    655 set32(UNewTrie2 *trie,
    656      UChar32 c, UBool forLSCP, uint32_t value,
    657      UErrorCode *pErrorCode) {
    658    int32_t block;
    659 
    660    if(trie==nullptr || trie->isCompacted) {
    661        *pErrorCode=U_NO_WRITE_PERMISSION;
    662        return;
    663    }
    664 #ifdef UCPTRIE_DEBUG
    665    umutablecptrie_set(trie->t3, c, value, pErrorCode);
    666 #endif
    667 
    668    block=getDataBlock(trie, c, forLSCP);
    669    if(block<0) {
    670        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    671        return;
    672    }
    673 
    674    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
    675 }
    676 
    677 U_CAPI void U_EXPORT2
    678 utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
    679    if(U_FAILURE(*pErrorCode)) {
    680        return;
    681    }
    682    if((uint32_t)c>0x10ffff) {
    683        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    684        return;
    685    }
    686    set32(trie->newTrie, c, true, value, pErrorCode);
    687 }
    688 
    689 U_CAPI void U_EXPORT2
    690 utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
    691                                     UChar32 c, uint32_t value,
    692                                     UErrorCode *pErrorCode) {
    693    if(U_FAILURE(*pErrorCode)) {
    694        return;
    695    }
    696    if(!U_IS_LEAD(c)) {
    697        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    698        return;
    699    }
    700    set32(trie->newTrie, c, false, value, pErrorCode);
    701 }
    702 
    703 static void
    704 writeBlock(uint32_t *block, uint32_t value) {
    705    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
    706    while(block<limit) {
    707        *block++=value;
    708    }
    709 }
    710 
    711 /**
    712 * initialValue is ignored if overwrite=true
    713 * @internal
    714 */
    715 static void
    716 fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
    717          uint32_t value, uint32_t initialValue, UBool overwrite) {
    718    uint32_t *pLimit;
    719 
    720    pLimit=block+limit;
    721    block+=start;
    722    if(overwrite) {
    723        while(block<pLimit) {
    724            *block++=value;
    725        }
    726    } else {
    727        while(block<pLimit) {
    728            if(*block==initialValue) {
    729                *block=value;
    730            }
    731            ++block;
    732        }
    733    }
    734 }
    735 
    736 U_CAPI void U_EXPORT2
    737 utrie2_setRange32(UTrie2 *trie,
    738                  UChar32 start, UChar32 end,
    739                  uint32_t value, UBool overwrite,
    740                  UErrorCode *pErrorCode) {
    741    /*
    742     * repeat value in [start..end]
    743     * mark index values for repeat-data blocks by setting bit 31 of the index values
    744     * fill around existing values if any, if(overwrite)
    745     */
    746    UNewTrie2 *newTrie;
    747    int32_t block, rest, repeatBlock;
    748    UChar32 limit;
    749 
    750    if(U_FAILURE(*pErrorCode)) {
    751        return;
    752    }
    753    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
    754        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    755        return;
    756    }
    757    newTrie=trie->newTrie;
    758    if(newTrie==nullptr || newTrie->isCompacted) {
    759        *pErrorCode=U_NO_WRITE_PERMISSION;
    760        return;
    761    }
    762 #ifdef UCPTRIE_DEBUG
    763    umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode);
    764 #endif
    765    if(!overwrite && value==newTrie->initialValue) {
    766        return; /* nothing to do */
    767    }
    768 
    769    limit=end+1;
    770    if(start&UTRIE2_DATA_MASK) {
    771        UChar32 nextStart;
    772 
    773        /* set partial block at [start..following block boundary[ */
    774        block=getDataBlock(newTrie, start, true);
    775        if(block<0) {
    776            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    777            return;
    778        }
    779 
    780        nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK;
    781        if(nextStart<=limit) {
    782            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
    783                      value, newTrie->initialValue, overwrite);
    784            start=nextStart;
    785        } else {
    786            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
    787                      value, newTrie->initialValue, overwrite);
    788            return;
    789        }
    790    }
    791 
    792    /* number of positions in the last, partial block */
    793    rest=limit&UTRIE2_DATA_MASK;
    794 
    795    /* round down limit to a block boundary */
    796    limit&=~UTRIE2_DATA_MASK;
    797 
    798    /* iterate over all-value blocks */
    799    if(value==newTrie->initialValue) {
    800        repeatBlock=newTrie->dataNullOffset;
    801    } else {
    802        repeatBlock=-1;
    803    }
    804 
    805    while(start<limit) {
    806        int32_t i2;
    807        UBool setRepeatBlock=false;
    808 
    809        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, true)) {
    810            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
    811            continue;
    812        }
    813 
    814        /* get index value */
    815        i2=getIndex2Block(newTrie, start, true);
    816        if(i2<0) {
    817            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    818            return;
    819        }
    820        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
    821        block=newTrie->index2[i2];
    822        if(isWritableBlock(newTrie, block)) {
    823            /* already allocated */
    824            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
    825                /*
    826                 * We overwrite all values, and it's not a
    827                 * protected (ASCII-linear or 2-byte UTF-8) block:
    828                 * replace with the repeatBlock.
    829                 */
    830                setRepeatBlock=true;
    831            } else {
    832                /* !overwrite, or protected block: just write the values into this block */
    833                fillBlock(newTrie->data+block,
    834                          0, UTRIE2_DATA_BLOCK_LENGTH,
    835                          value, newTrie->initialValue, overwrite);
    836            }
    837        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
    838            /*
    839             * Set the repeatBlock instead of the null block or previous repeat block:
    840             *
    841             * If !isWritableBlock() then all entries in the block have the same value
    842             * because it's the null block or a range block (the repeatBlock from a previous
    843             * call to utrie2_setRange32()).
    844             * No other blocks are used multiple times before compacting.
    845             *
    846             * The null block is the only non-writable block with the initialValue because
    847             * of the repeatBlock initialization above. (If value==initialValue, then
    848             * the repeatBlock will be the null data block.)
    849             *
    850             * We set our repeatBlock if the desired value differs from the block's value,
    851             * and if we overwrite any data or if the data is all initial values
    852             * (which is the same as the block being the null block, see above).
    853             */
    854            setRepeatBlock=true;
    855        }
    856        if(setRepeatBlock) {
    857            if(repeatBlock>=0) {
    858                setIndex2Entry(newTrie, i2, repeatBlock);
    859            } else {
    860                /* create and set and fill the repeatBlock */
    861                repeatBlock=getDataBlock(newTrie, start, true);
    862                if(repeatBlock<0) {
    863                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    864                    return;
    865                }
    866                writeBlock(newTrie->data+repeatBlock, value);
    867            }
    868        }
    869 
    870        start+=UTRIE2_DATA_BLOCK_LENGTH;
    871    }
    872 
    873    if(rest>0) {
    874        /* set partial block at [last block boundary..limit[ */
    875        block=getDataBlock(newTrie, start, true);
    876        if(block<0) {
    877            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    878            return;
    879        }
    880 
    881        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
    882    }
    883 }
    884 
    885 /* compaction --------------------------------------------------------------- */
    886 
    887 static inline UBool
    888 equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
    889    while(length>0 && *s==*t) {
    890        ++s;
    891        ++t;
    892        --length;
    893    }
    894    return length == 0;
    895 }
    896 
    897 static inline UBool
    898 equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
    899    while(length>0 && *s==*t) {
    900        ++s;
    901        ++t;
    902        --length;
    903    }
    904    return length == 0;
    905 }
    906 
    907 static int32_t
    908 findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
    909    int32_t block;
    910 
    911    /* ensure that we do not even partially get past index2Length */
    912    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
    913 
    914    for(block=0; block<=index2Length; ++block) {
    915        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
    916            return block;
    917        }
    918    }
    919    return -1;
    920 }
    921 
    922 static int32_t
    923 findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
    924    int32_t block;
    925 
    926    /* ensure that we do not even partially get past dataLength */
    927    dataLength-=blockLength;
    928 
    929    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
    930        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
    931            return block;
    932        }
    933    }
    934    return -1;
    935 }
    936 
    937 /*
    938 * Find the start of the last range in the trie by enumerating backward.
    939 * Indexes for supplementary code points higher than this will be omitted.
    940 */
    941 static UChar32
    942 findHighStart(UNewTrie2 *trie, uint32_t highValue) {
    943    const uint32_t *data32;
    944 
    945    uint32_t value, initialValue;
    946    UChar32 c, prev;
    947    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
    948 
    949    data32=trie->data;
    950    initialValue=trie->initialValue;
    951 
    952    index2NullOffset=trie->index2NullOffset;
    953    nullBlock=trie->dataNullOffset;
    954 
    955    /* set variables for previous range */
    956    if(highValue==initialValue) {
    957        prevI2Block=index2NullOffset;
    958        prevBlock=nullBlock;
    959    } else {
    960        prevI2Block=-1;
    961        prevBlock=-1;
    962    }
    963    prev=0x110000;
    964 
    965    /* enumerate index-2 blocks */
    966    i1=UNEWTRIE2_INDEX_1_LENGTH;
    967    c=prev;
    968    while(c>0) {
    969        i2Block=trie->index1[--i1];
    970        if(i2Block==prevI2Block) {
    971            /* the index-2 block is the same as the previous one, and filled with highValue */
    972            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
    973            continue;
    974        }
    975        prevI2Block=i2Block;
    976        if(i2Block==index2NullOffset) {
    977            /* this is the null index-2 block */
    978            if(highValue!=initialValue) {
    979                return c;
    980            }
    981            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
    982        } else {
    983            /* enumerate data blocks for one index-2 block */
    984            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
    985                block=trie->index2[i2Block+ --i2];
    986                if(block==prevBlock) {
    987                    /* the block is the same as the previous one, and filled with highValue */
    988                    c-=UTRIE2_DATA_BLOCK_LENGTH;
    989                    continue;
    990                }
    991                prevBlock=block;
    992                if(block==nullBlock) {
    993                    /* this is the null data block */
    994                    if(highValue!=initialValue) {
    995                        return c;
    996                    }
    997                    c-=UTRIE2_DATA_BLOCK_LENGTH;
    998                } else {
    999                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
   1000                        value=data32[block+ --j];
   1001                        if(value!=highValue) {
   1002                            return c;
   1003                        }
   1004                        --c;
   1005                    }
   1006                }
   1007            }
   1008        }
   1009    }
   1010 
   1011    /* deliver last range */
   1012    return 0;
   1013 }
   1014 
   1015 /*
   1016 * Compact a build-time trie.
   1017 *
   1018 * The compaction
   1019 * - removes blocks that are identical with earlier ones
   1020 * - overlaps adjacent blocks as much as possible (if overlap==true)
   1021 * - moves blocks in steps of the data granularity
   1022 * - moves and overlaps blocks that overlap with multiple values in the overlap region
   1023 *
   1024 * It does not
   1025 * - try to move and overlap blocks that are not already adjacent
   1026 */
   1027 static void
   1028 compactData(UNewTrie2 *trie) {
   1029 #ifdef UTRIE2_DEBUG
   1030    int32_t countSame=0, sumOverlaps=0;
   1031 #endif
   1032 
   1033    int32_t start, newStart, movedStart;
   1034    int32_t blockLength, overlap;
   1035    int32_t i, mapIndex, blockCount;
   1036 
   1037    /* do not compact linear-ASCII data */
   1038    newStart=UTRIE2_DATA_START_OFFSET;
   1039    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
   1040        trie->map[i]=start;
   1041    }
   1042 
   1043    /*
   1044     * Start with a block length of 64 for 2-byte UTF-8,
   1045     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
   1046     */
   1047    blockLength=64;
   1048    blockCount=blockLength>>UTRIE2_SHIFT_2;
   1049    for(start=newStart; start<trie->dataLength;) {
   1050        /*
   1051         * start: index of first entry of current block
   1052         * newStart: index where the current block is to be moved
   1053         *           (right after current end of already-compacted data)
   1054         */
   1055        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
   1056            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
   1057            blockCount=1;
   1058        }
   1059 
   1060        /* skip blocks that are not used */
   1061        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
   1062            /* advance start to the next block */
   1063            start+=blockLength;
   1064 
   1065            /* leave newStart with the previous block! */
   1066            continue;
   1067        }
   1068 
   1069        /* search for an identical block */
   1070        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
   1071             >=0
   1072        ) {
   1073 #ifdef UTRIE2_DEBUG
   1074            ++countSame;
   1075 #endif
   1076            /* found an identical block, set the other block's index value for the current block */
   1077            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
   1078                trie->map[mapIndex++]=movedStart;
   1079                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
   1080            }
   1081 
   1082            /* advance start to the next block */
   1083            start+=blockLength;
   1084 
   1085            /* leave newStart with the previous block! */
   1086            continue;
   1087        }
   1088 
   1089        /* see if the beginning of this block can be overlapped with the end of the previous block */
   1090        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
   1091        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
   1092            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
   1093            overlap-=UTRIE2_DATA_GRANULARITY) {}
   1094 
   1095 #ifdef UTRIE2_DEBUG
   1096            sumOverlaps+=overlap;
   1097 #endif
   1098        if(overlap>0 || newStart<start) {
   1099            /* some overlap, or just move the whole block */
   1100            movedStart=newStart-overlap;
   1101            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
   1102                trie->map[mapIndex++]=movedStart;
   1103                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
   1104            }
   1105 
   1106            /* move the non-overlapping indexes to their new positions */
   1107            start+=overlap;
   1108            for(i=blockLength-overlap; i>0; --i) {
   1109                trie->data[newStart++]=trie->data[start++];
   1110            }
   1111        } else /* no overlap && newStart==start */ {
   1112            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
   1113                trie->map[mapIndex++]=start;
   1114                start+=UTRIE2_DATA_BLOCK_LENGTH;
   1115            }
   1116            newStart=start;
   1117        }
   1118    }
   1119 
   1120    /* now adjust the index-2 table */
   1121    for(i=0; i<trie->index2Length; ++i) {
   1122        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
   1123            /* Gap indexes are invalid (-1). Skip over the gap. */
   1124            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
   1125        }
   1126        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
   1127    }
   1128    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
   1129 
   1130    /* ensure dataLength alignment */
   1131    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
   1132        trie->data[newStart++]=trie->initialValue;
   1133    }
   1134 
   1135 #ifdef UTRIE2_DEBUG
   1136    /* we saved some space */
   1137    printf("compacting UTrie2: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n",
   1138            (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps);
   1139 #endif
   1140 
   1141    trie->dataLength=newStart;
   1142 }
   1143 
   1144 static void
   1145 compactIndex2(UNewTrie2 *trie) {
   1146    int32_t i, start, newStart, movedStart, overlap;
   1147 
   1148    /* do not compact linear-BMP index-2 blocks */
   1149    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
   1150    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
   1151        trie->map[i]=start;
   1152    }
   1153 
   1154    /* Reduce the index table gap to what will be needed at runtime. */
   1155    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
   1156 
   1157    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
   1158        /*
   1159         * start: index of first entry of current block
   1160         * newStart: index where the current block is to be moved
   1161         *           (right after current end of already-compacted data)
   1162         */
   1163 
   1164        /* search for an identical block */
   1165        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
   1166             >=0
   1167        ) {
   1168            /* found an identical block, set the other block's index value for the current block */
   1169            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
   1170 
   1171            /* advance start to the next block */
   1172            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
   1173 
   1174            /* leave newStart with the previous block! */
   1175            continue;
   1176        }
   1177 
   1178        /* see if the beginning of this block can be overlapped with the end of the previous block */
   1179        /* look for maximum overlap with the previous, adjacent block */
   1180        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
   1181            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
   1182            --overlap) {}
   1183 
   1184        if(overlap>0 || newStart<start) {
   1185            /* some overlap, or just move the whole block */
   1186            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
   1187 
   1188            /* move the non-overlapping indexes to their new positions */
   1189            start+=overlap;
   1190            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
   1191                trie->index2[newStart++]=trie->index2[start++];
   1192            }
   1193        } else /* no overlap && newStart==start */ {
   1194            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
   1195            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
   1196            newStart=start;
   1197        }
   1198    }
   1199 
   1200    /* now adjust the index-1 table */
   1201    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
   1202        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
   1203    }
   1204    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
   1205 
   1206    /*
   1207     * Ensure data table alignment:
   1208     * Needs to be granularity-aligned for 16-bit trie
   1209     * (so that dataMove will be down-shiftable),
   1210     * and 2-aligned for uint32_t data.
   1211     */
   1212    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
   1213        /* Arbitrary value: 0x3fffc not possible for real data. */
   1214        trie->index2[newStart++] = static_cast<int32_t>(0xffff) << UTRIE2_INDEX_SHIFT;
   1215    }
   1216 
   1217 #ifdef UTRIE2_DEBUG
   1218    /* we saved some space */
   1219    printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n",
   1220            (long)trie->index2Length, (long)newStart);
   1221 #endif
   1222 
   1223    trie->index2Length=newStart;
   1224 }
   1225 
   1226 static void
   1227 compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
   1228    UNewTrie2 *newTrie;
   1229    UChar32 highStart, suppHighStart;
   1230    uint32_t highValue;
   1231 
   1232    newTrie=trie->newTrie;
   1233 
   1234    /* find highStart and round it up */
   1235    highValue=utrie2_get32(trie, 0x10ffff);
   1236    highStart=findHighStart(newTrie, highValue);
   1237    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
   1238    if(highStart==0x110000) {
   1239        highValue=trie->errorValue;
   1240    }
   1241 
   1242    /*
   1243     * Set trie->highStart only after utrie2_get32(trie, highStart).
   1244     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
   1245     */
   1246    trie->highStart=newTrie->highStart=highStart;
   1247 
   1248 #ifdef UTRIE2_DEBUG
   1249    printf("UTrie2: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n",
   1250            (long)highStart, (long)highValue, (long)trie->initialValue);
   1251 #endif
   1252 
   1253    if(highStart<0x110000) {
   1254        /* Blank out [highStart..10ffff] to release associated data blocks. */
   1255        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
   1256        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, true, pErrorCode);
   1257        if(U_FAILURE(*pErrorCode)) {
   1258            return;
   1259        }
   1260    }
   1261 
   1262    compactData(newTrie);
   1263    if(highStart>0x10000) {
   1264        compactIndex2(newTrie);
   1265 #ifdef UTRIE2_DEBUG
   1266    } else {
   1267        printf("UTrie2: highStart U+%04lx  count of 16-bit index words %lu->%lu\n",
   1268                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
   1269 #endif
   1270    }
   1271 
   1272    /*
   1273     * Store the highValue in the data array and round up the dataLength.
   1274     * Must be done after compactData() because that assumes that dataLength
   1275     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
   1276     */
   1277    newTrie->data[newTrie->dataLength++]=highValue;
   1278    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
   1279        newTrie->data[newTrie->dataLength++]=trie->initialValue;
   1280    }
   1281 
   1282    newTrie->isCompacted=true;
   1283 }
   1284 
   1285 /* serialization ------------------------------------------------------------ */
   1286 
   1287 /**
   1288 * Maximum length of the runtime index array.
   1289 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
   1290 * (The actual maximum length is lower,
   1291 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
   1292 */
   1293 #define UTRIE2_MAX_INDEX_LENGTH 0xffff
   1294 
   1295 /**
   1296 * Maximum length of the runtime data array.
   1297 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
   1298 * and by uint16_t UTrie2Header.shiftedDataLength.
   1299 */
   1300 #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
   1301 
   1302 /* Compact and internally serialize the trie. */
   1303 U_CAPI void U_EXPORT2
   1304 utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
   1305    UNewTrie2 *newTrie;
   1306    UTrie2Header *header;
   1307    uint32_t *p;
   1308    uint16_t *dest16;
   1309    int32_t i, length;
   1310    int32_t allIndexesLength;
   1311    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
   1312    UChar32 highStart;
   1313 
   1314    /* argument check */
   1315    if(U_FAILURE(*pErrorCode)) {
   1316        return;
   1317    }
   1318    if( trie==nullptr ||
   1319        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
   1320    ) {
   1321        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1322        return;
   1323    }
   1324    newTrie=trie->newTrie;
   1325    if(newTrie==nullptr) {
   1326        /* already frozen */
   1327        UTrie2ValueBits frozenValueBits=
   1328            trie->data16!=nullptr ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
   1329        if(valueBits!=frozenValueBits) {
   1330            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1331        }
   1332        return;
   1333    }
   1334 
   1335    /* compact if necessary */
   1336    if(!newTrie->isCompacted) {
   1337        compactTrie(trie, pErrorCode);
   1338        if(U_FAILURE(*pErrorCode)) {
   1339            return;
   1340        }
   1341    }
   1342    highStart=trie->highStart;
   1343 
   1344    if(highStart<=0x10000) {
   1345        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
   1346    } else {
   1347        allIndexesLength=newTrie->index2Length;
   1348    }
   1349    if(valueBits==UTRIE2_16_VALUE_BITS) {
   1350        dataMove=allIndexesLength;
   1351    } else {
   1352        dataMove=0;
   1353    }
   1354 
   1355    /* are indexLength and dataLength within limits? */
   1356    if( /* for unshifted indexLength */
   1357        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
   1358        /* for unshifted dataNullOffset */
   1359        (dataMove+newTrie->dataNullOffset)>0xffff ||
   1360        /* for unshifted 2-byte UTF-8 index-2 values */
   1361        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
   1362        /* for shiftedDataLength */
   1363        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
   1364    ) {
   1365        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1366        return;
   1367    }
   1368 
   1369    /* calculate the total serialized length */
   1370    length=sizeof(UTrie2Header)+allIndexesLength*2;
   1371    if(valueBits==UTRIE2_16_VALUE_BITS) {
   1372        length+=newTrie->dataLength*2;
   1373    } else {
   1374        length+=newTrie->dataLength*4;
   1375    }
   1376 
   1377    trie->memory=uprv_malloc(length);
   1378    if(trie->memory==nullptr) {
   1379        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1380        return;
   1381    }
   1382    trie->length=length;
   1383    trie->isMemoryOwned=true;
   1384 
   1385    trie->indexLength=allIndexesLength;
   1386    trie->dataLength=newTrie->dataLength;
   1387    if(highStart<=0x10000) {
   1388        trie->index2NullOffset=0xffff;
   1389    } else {
   1390        trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset);
   1391    }
   1392    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
   1393    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
   1394 
   1395    /* set the header fields */
   1396    header=(UTrie2Header *)trie->memory;
   1397 
   1398    header->signature=UTRIE2_SIG; /* "Tri2" */
   1399    header->options=(uint16_t)valueBits;
   1400 
   1401    header->indexLength=(uint16_t)trie->indexLength;
   1402    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
   1403    header->index2NullOffset=trie->index2NullOffset;
   1404    header->dataNullOffset=trie->dataNullOffset;
   1405    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
   1406 
   1407    /* fill the index and data arrays */
   1408    dest16=(uint16_t *)(header+1);
   1409    trie->index=dest16;
   1410 
   1411    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
   1412    p=(uint32_t *)newTrie->index2;
   1413    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
   1414        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
   1415    }
   1416 
   1417    /* write UTF-8 2-byte index-2 values, not right-shifted */
   1418    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
   1419        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
   1420    }
   1421    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
   1422        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
   1423    }
   1424 
   1425    if(highStart>0x10000) {
   1426        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
   1427        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
   1428 
   1429        /* write 16-bit index-1 values for supplementary code points */
   1430        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
   1431        for(i=index1Length; i>0; --i) {
   1432            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
   1433        }
   1434 
   1435        /*
   1436         * write the index-2 array values for supplementary code points,
   1437         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
   1438         */
   1439        p=(uint32_t *)newTrie->index2+index2Offset;
   1440        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
   1441            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
   1442        }
   1443    }
   1444 
   1445    /* write the 16/32-bit data array */
   1446    switch(valueBits) {
   1447    case UTRIE2_16_VALUE_BITS:
   1448        /* write 16-bit data values */
   1449        trie->data16=dest16;
   1450        trie->data32=nullptr;
   1451        p=newTrie->data;
   1452        for(i=newTrie->dataLength; i>0; --i) {
   1453            *dest16++=(uint16_t)*p++;
   1454        }
   1455        break;
   1456    case UTRIE2_32_VALUE_BITS:
   1457        /* write 32-bit data values */
   1458        trie->data16=nullptr;
   1459        trie->data32=(uint32_t *)dest16;
   1460        uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
   1461        break;
   1462    default:
   1463        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1464        return;
   1465    }
   1466 
   1467 #ifdef UTRIE2_DEBUG
   1468    utrie2_printLengths(trie, "");
   1469 #endif
   1470 
   1471 #ifdef UCPTRIE_DEBUG
   1472    umutablecptrie_setName(newTrie->t3, trie->name);
   1473    ucptrie_close(
   1474        umutablecptrie_buildImmutable(
   1475            newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode));
   1476 #endif
   1477    /* Delete the UNewTrie2. */
   1478    uprv_free(newTrie->data);
   1479    uprv_free(newTrie);
   1480    trie->newTrie=nullptr;
   1481 }