tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utrie_swap.cpp (11236B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // utrie_swap.cpp
      5 // created: 2018aug08 Markus W. Scherer
      6 
      7 #include "unicode/utypes.h"
      8 #include "cmemory.h"
      9 #include "ucptrie_impl.h"
     10 #include "udataswp.h"
     11 #include "utrie.h"
     12 #include "utrie2_impl.h"
     13 
     14 // These functions for swapping different generations of ICU code point tries are here
     15 // so that their implementation files need not depend on swapper code,
     16 // need not depend on each other, and so that other swapper code
     17 // need not depend on other trie code.
     18 
     19 namespace {
     20 
     21 constexpr int32_t ASCII_LIMIT = 0x80;
     22 
     23 }  // namespace
     24 
     25 U_CAPI int32_t U_EXPORT2
     26 utrie_swap(const UDataSwapper *ds,
     27           const void *inData, int32_t length, void *outData,
     28           UErrorCode *pErrorCode) {
     29    const UTrieHeader *inTrie;
     30    UTrieHeader trie;
     31    int32_t size;
     32    UBool dataIs32;
     33 
     34    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
     35        return 0;
     36    }
     37    if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
     38        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     39        return 0;
     40    }
     41 
     42    /* setup and swapping */
     43    if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
     44        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     45        return 0;
     46    }
     47 
     48    inTrie=(const UTrieHeader *)inData;
     49    trie.signature=ds->readUInt32(inTrie->signature);
     50    trie.options=ds->readUInt32(inTrie->options);
     51    trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
     52    trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
     53 
     54    if( trie.signature!=0x54726965 ||
     55        (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
     56        ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
     57        trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
     58        (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
     59        trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
     60        (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
     61        ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
     62    ) {
     63        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
     64        return 0;
     65    }
     66 
     67    dataIs32 = (trie.options & UTRIE_OPTIONS_DATA_IS_32_BIT) != 0;
     68    size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
     69 
     70    if(length>=0) {
     71        UTrieHeader *outTrie;
     72 
     73        if(length<size) {
     74            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     75            return 0;
     76        }
     77 
     78        outTrie=(UTrieHeader *)outData;
     79 
     80        /* swap the header */
     81        ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
     82 
     83        /* swap the index and the data */
     84        if(dataIs32) {
     85            ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
     86            ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
     87                                     (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
     88        } else {
     89            ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
     90        }
     91    }
     92 
     93    return size;
     94 }
     95 
     96 U_CAPI int32_t U_EXPORT2
     97 utrie2_swap(const UDataSwapper *ds,
     98            const void *inData, int32_t length, void *outData,
     99            UErrorCode *pErrorCode) {
    100    const UTrie2Header *inTrie;
    101    UTrie2Header trie;
    102    int32_t dataLength, size;
    103    UTrie2ValueBits valueBits;
    104 
    105    if(U_FAILURE(*pErrorCode)) {
    106        return 0;
    107    }
    108    if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
    109        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    110        return 0;
    111    }
    112 
    113    /* setup and swapping */
    114    if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
    115        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    116        return 0;
    117    }
    118 
    119    inTrie=(const UTrie2Header *)inData;
    120    trie.signature=ds->readUInt32(inTrie->signature);
    121    trie.options=ds->readUInt16(inTrie->options);
    122    trie.indexLength=ds->readUInt16(inTrie->indexLength);
    123    trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
    124 
    125    valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
    126    dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
    127 
    128    if( trie.signature!=UTRIE2_SIG ||
    129        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
    130        trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
    131        dataLength<UTRIE2_DATA_START_OFFSET
    132    ) {
    133        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
    134        return 0;
    135    }
    136 
    137    size=sizeof(UTrie2Header)+trie.indexLength*2;
    138    switch(valueBits) {
    139    case UTRIE2_16_VALUE_BITS:
    140        size+=dataLength*2;
    141        break;
    142    case UTRIE2_32_VALUE_BITS:
    143        size+=dataLength*4;
    144        break;
    145    default:
    146        *pErrorCode=U_INVALID_FORMAT_ERROR;
    147        return 0;
    148    }
    149 
    150    if(length>=0) {
    151        UTrie2Header *outTrie;
    152 
    153        if(length<size) {
    154            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    155            return 0;
    156        }
    157 
    158        outTrie=(UTrie2Header *)outData;
    159 
    160        /* swap the header */
    161        ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
    162        ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
    163 
    164        /* swap the index and the data */
    165        switch(valueBits) {
    166        case UTRIE2_16_VALUE_BITS:
    167            ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
    168            break;
    169        case UTRIE2_32_VALUE_BITS:
    170            ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
    171            ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
    172                                     (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
    173            break;
    174        default:
    175            *pErrorCode=U_INVALID_FORMAT_ERROR;
    176            return 0;
    177        }
    178    }
    179 
    180    return size;
    181 }
    182 
    183 U_CAPI int32_t U_EXPORT2
    184 ucptrie_swap(const UDataSwapper *ds,
    185             const void *inData, int32_t length, void *outData,
    186             UErrorCode *pErrorCode) {
    187    const UCPTrieHeader *inTrie;
    188    UCPTrieHeader trie;
    189    int32_t dataLength, size;
    190    UCPTrieValueWidth valueWidth;
    191 
    192    if(U_FAILURE(*pErrorCode)) {
    193        return 0;
    194    }
    195    if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
    196        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    197        return 0;
    198    }
    199 
    200    /* setup and swapping */
    201    if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
    202        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    203        return 0;
    204    }
    205 
    206    inTrie=(const UCPTrieHeader *)inData;
    207    trie.signature=ds->readUInt32(inTrie->signature);
    208    trie.options=ds->readUInt16(inTrie->options);
    209    trie.indexLength=ds->readUInt16(inTrie->indexLength);
    210    trie.dataLength = ds->readUInt16(inTrie->dataLength);
    211 
    212    UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
    213    valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
    214    dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
    215 
    216    int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
    217        UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
    218    if( trie.signature!=UCPTRIE_SIG ||
    219        type > UCPTRIE_TYPE_SMALL ||
    220        (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
    221        valueWidth > UCPTRIE_VALUE_BITS_8 ||
    222        trie.indexLength < minIndexLength ||
    223        dataLength < ASCII_LIMIT
    224    ) {
    225        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
    226        return 0;
    227    }
    228 
    229    size=sizeof(UCPTrieHeader)+trie.indexLength*2;
    230    switch(valueWidth) {
    231    case UCPTRIE_VALUE_BITS_16:
    232        size+=dataLength*2;
    233        break;
    234    case UCPTRIE_VALUE_BITS_32:
    235        size+=dataLength*4;
    236        break;
    237    case UCPTRIE_VALUE_BITS_8:
    238        size+=dataLength;
    239        break;
    240    default:
    241        *pErrorCode=U_INVALID_FORMAT_ERROR;
    242        return 0;
    243    }
    244 
    245    if(length>=0) {
    246        UCPTrieHeader *outTrie;
    247 
    248        if(length<size) {
    249            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    250            return 0;
    251        }
    252 
    253        outTrie=(UCPTrieHeader *)outData;
    254 
    255        /* swap the header */
    256        ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
    257        ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
    258 
    259        /* swap the index */
    260        const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
    261        uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
    262        ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
    263 
    264        /* swap the data */
    265        const uint16_t *inData=inIndex+trie.indexLength;
    266        uint16_t *outData=outIndex+trie.indexLength;
    267        switch(valueWidth) {
    268        case UCPTRIE_VALUE_BITS_16:
    269            ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
    270            break;
    271        case UCPTRIE_VALUE_BITS_32:
    272            ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
    273            break;
    274        case UCPTRIE_VALUE_BITS_8:
    275            if(inTrie!=outTrie) {
    276                uprv_memmove(outData, inData, dataLength);
    277            }
    278            break;
    279        default:
    280            *pErrorCode=U_INVALID_FORMAT_ERROR;
    281            return 0;
    282        }
    283    }
    284 
    285    return size;
    286 }
    287 
    288 namespace {
    289 
    290 /**
    291 * Gets the trie version from 32-bit-aligned memory containing the serialized form
    292 * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
    293 *
    294 * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
    295 * @param length the number of bytes available at data;
    296 *               can be more than necessary (see return value)
    297 * @param anyEndianOk If false, only platform-endian serialized forms are recognized.
    298 *                    If true, opposite-endian serialized forms are recognized as well.
    299 * @return the trie version of the serialized form, or 0 if it is not
    300 *         recognized as a serialized trie
    301 */
    302 int32_t
    303 getVersion(const void *data, int32_t length, UBool anyEndianOk) {
    304    uint32_t signature;
    305    if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
    306        return 0;
    307    }
    308    signature = *static_cast<const uint32_t*>(data);
    309    if(signature==UCPTRIE_SIG) {
    310        return 3;
    311    }
    312    if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
    313        return 3;
    314    }
    315    if(signature==UTRIE2_SIG) {
    316        return 2;
    317    }
    318    if(anyEndianOk && signature==UTRIE2_OE_SIG) {
    319        return 2;
    320    }
    321    if(signature==UTRIE_SIG) {
    322        return 1;
    323    }
    324    if(anyEndianOk && signature==UTRIE_OE_SIG) {
    325        return 1;
    326    }
    327    return 0;
    328 }
    329 
    330 }  // namespace
    331 
    332 U_CAPI int32_t U_EXPORT2
    333 utrie_swapAnyVersion(const UDataSwapper *ds,
    334                     const void *inData, int32_t length, void *outData,
    335                     UErrorCode *pErrorCode) {
    336    if(U_FAILURE(*pErrorCode)) { return 0; }
    337    switch(getVersion(inData, length, true)) {
    338    case 1:
    339        return utrie_swap(ds, inData, length, outData, pErrorCode);
    340    case 2:
    341        return utrie2_swap(ds, inData, length, outData, pErrorCode);
    342    case 3:
    343        return ucptrie_swap(ds, inData, length, outData, pErrorCode);
    344    default:
    345        *pErrorCode=U_INVALID_FORMAT_ERROR;
    346        return 0;
    347    }
    348 }