tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

propsvec.cpp (16081B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2002-2011, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  propsvec.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2002feb22
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Store bits (Unicode character properties) in bit set vectors.
     19 */
     20 
     21 #include <stdlib.h>
     22 #include "unicode/utypes.h"
     23 #include "cmemory.h"
     24 #include "utrie.h"
     25 #include "utrie2.h"
     26 #include "uarrsort.h"
     27 #include "propsvec.h"
     28 #include "uassert.h"
     29 
     30 struct UPropsVectors {
     31    uint32_t *v;
     32    int32_t columns;  /* number of columns, plus two for start & limit values */
     33    int32_t maxRows;
     34    int32_t rows;
     35    int32_t prevRow;  /* search optimization: remember last row seen */
     36    UBool isCompacted;
     37 };
     38 
     39 #define UPVEC_INITIAL_ROWS (1<<12)
     40 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
     41 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
     42 
     43 U_CAPI UPropsVectors * U_EXPORT2
     44 upvec_open(int32_t columns, UErrorCode *pErrorCode) {
     45    UPropsVectors *pv;
     46    uint32_t *v, *row;
     47    uint32_t cp;
     48 
     49    if(U_FAILURE(*pErrorCode)) {
     50        return nullptr;
     51    }
     52    if(columns<1) {
     53        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     54        return nullptr;
     55    }
     56    columns+=2; /* count range start and limit columns */
     57 
     58    pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
     59    v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
     60    if(pv==nullptr || v==nullptr) {
     61        uprv_free(pv);
     62        uprv_free(v);
     63        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     64        return nullptr;
     65    }
     66    uprv_memset(pv, 0, sizeof(UPropsVectors));
     67    pv->v=v;
     68    pv->columns=columns;
     69    pv->maxRows=UPVEC_INITIAL_ROWS;
     70    pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
     71 
     72    /* set the all-Unicode row and the special-value rows */
     73    row=pv->v;
     74    uprv_memset(row, 0, pv->rows*columns*4);
     75    row[0]=0;
     76    row[1]=0x110000;
     77    row+=columns;
     78    for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
     79        row[0]=cp;
     80        row[1]=cp+1;
     81        row+=columns;
     82    }
     83    return pv;
     84 }
     85 
     86 U_CAPI void U_EXPORT2
     87 upvec_close(UPropsVectors *pv) {
     88    if(pv!=nullptr) {
     89        uprv_free(pv->v);
     90        uprv_free(pv);
     91    }
     92 }
     93 
     94 static uint32_t *
     95 _findRow(UPropsVectors *pv, UChar32 rangeStart) {
     96    uint32_t *row;
     97    int32_t columns, i, start, limit, prevRow;
     98 
     99    columns=pv->columns;
    100    limit=pv->rows;
    101    prevRow=pv->prevRow;
    102 
    103    /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
    104    row=pv->v+prevRow*columns;
    105    if (rangeStart >= static_cast<UChar32>(row[0])) {
    106        if (rangeStart < static_cast<UChar32>(row[1])) {
    107            /* same row as last seen */
    108            return row;
    109        } else if (rangeStart < static_cast<UChar32>((row += columns)[1])) {
    110            /* next row after the last one */
    111            pv->prevRow=prevRow+1;
    112            return row;
    113        } else if (rangeStart < static_cast<UChar32>((row += columns)[1])) {
    114            /* second row after the last one */
    115            pv->prevRow=prevRow+2;
    116            return row;
    117        } else if ((rangeStart - static_cast<UChar32>(row[1])) < 10) {
    118            /* we are close, continue looping */
    119            prevRow+=2;
    120            do {
    121                ++prevRow;
    122                row+=columns;
    123            } while (rangeStart >= static_cast<UChar32>(row[1]));
    124            pv->prevRow=prevRow;
    125            return row;
    126        }
    127    } else if (rangeStart < static_cast<UChar32>(pv->v[1])) {
    128        /* the very first row */
    129        pv->prevRow=0;
    130        return pv->v;
    131    }
    132 
    133    /* do a binary search for the start of the range */
    134    start=0;
    135    while(start<limit-1) {
    136        i=(start+limit)/2;
    137        row=pv->v+i*columns;
    138        if (rangeStart < static_cast<UChar32>(row[0])) {
    139            limit=i;
    140        } else if (rangeStart < static_cast<UChar32>(row[1])) {
    141            pv->prevRow=i;
    142            return row;
    143        } else {
    144            start=i;
    145        }
    146    }
    147 
    148    /* must be found because all ranges together always cover all of Unicode */
    149    pv->prevRow=start;
    150    return pv->v+start*columns;
    151 }
    152 
    153 U_CAPI void U_EXPORT2
    154 upvec_setValue(UPropsVectors *pv,
    155               UChar32 start, UChar32 end,
    156               int32_t column,
    157               uint32_t value, uint32_t mask,
    158               UErrorCode *pErrorCode) {
    159    uint32_t *firstRow, *lastRow;
    160    int32_t columns;
    161    UChar32 limit;
    162    UBool splitFirstRow, splitLastRow;
    163 
    164    /* argument checking */
    165    if(U_FAILURE(*pErrorCode)) {
    166        return;
    167    }
    168    if( pv==nullptr ||
    169        start<0 || start>end || end>UPVEC_MAX_CP ||
    170        column<0 || column>=(pv->columns-2)
    171    ) {
    172        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    173        return;
    174    }
    175    if(pv->isCompacted) {
    176        *pErrorCode=U_NO_WRITE_PERMISSION;
    177        return;
    178    }
    179    limit=end+1;
    180 
    181    /* initialize */
    182    columns=pv->columns;
    183    column+=2; /* skip range start and limit columns */
    184    value&=mask;
    185 
    186    /* find the rows whose ranges overlap with the input range */
    187 
    188    /* find the first and last rows, always successful */
    189    firstRow=_findRow(pv, start);
    190    lastRow=_findRow(pv, end);
    191 
    192    /*
    193     * Rows need to be split if they partially overlap with the
    194     * input range (only possible for the first and last rows)
    195     * and if their value differs from the input value.
    196     */
    197    splitFirstRow = start != static_cast<UChar32>(firstRow[0]) && value != (firstRow[column] & mask);
    198    splitLastRow = limit != static_cast<UChar32>(lastRow[1]) && value != (lastRow[column] & mask);
    199 
    200    /* split first/last rows if necessary */
    201    if(splitFirstRow || splitLastRow) {
    202        int32_t count, rows;
    203 
    204        rows=pv->rows;
    205        if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
    206            uint32_t *newVectors;
    207            int32_t newMaxRows;
    208 
    209            if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
    210                newMaxRows=UPVEC_MEDIUM_ROWS;
    211            } else if(pv->maxRows<UPVEC_MAX_ROWS) {
    212                newMaxRows=UPVEC_MAX_ROWS;
    213            } else {
    214                /* Implementation bug, or UPVEC_MAX_ROWS too low. */
    215                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    216                return;
    217            }
    218            newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
    219            if(newVectors==nullptr) {
    220                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    221                return;
    222            }
    223            uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4);
    224            firstRow=newVectors+(firstRow-pv->v);
    225            lastRow=newVectors+(lastRow-pv->v);
    226            uprv_free(pv->v);
    227            pv->v=newVectors;
    228            pv->maxRows=newMaxRows;
    229        }
    230 
    231        /* count the number of row cells to move after the last row, and move them */
    232        count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
    233        if(count>0) {
    234            uprv_memmove(
    235                lastRow+(1+splitFirstRow+splitLastRow)*columns,
    236                lastRow+columns,
    237                count*4);
    238        }
    239        pv->rows=rows+splitFirstRow+splitLastRow;
    240 
    241        /* split the first row, and move the firstRow pointer to the second part */
    242        if(splitFirstRow) {
    243            /* copy all affected rows up one and move the lastRow pointer */
    244            count = (int32_t)((lastRow-firstRow)+columns);
    245            uprv_memmove(firstRow+columns, firstRow, (size_t)count*4);
    246            lastRow+=columns;
    247 
    248            /* split the range and move the firstRow pointer */
    249            firstRow[1]=firstRow[columns]=(uint32_t)start;
    250            firstRow+=columns;
    251        }
    252 
    253        /* split the last row */
    254        if(splitLastRow) {
    255            /* copy the last row data */
    256            uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4);
    257 
    258            /* split the range and move the firstRow pointer */
    259            lastRow[1]=lastRow[columns]=(uint32_t)limit;
    260        }
    261    }
    262 
    263    /* set the "row last seen" to the last row for the range */
    264    pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
    265 
    266    /* set the input value in all remaining rows */
    267    firstRow+=column;
    268    lastRow+=column;
    269    mask=~mask;
    270    for(;;) {
    271        *firstRow=(*firstRow&mask)|value;
    272        if(firstRow==lastRow) {
    273            break;
    274        }
    275        firstRow+=columns;
    276    }
    277 }
    278 
    279 U_CAPI uint32_t U_EXPORT2
    280 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
    281    uint32_t *row;
    282    UPropsVectors *ncpv;
    283 
    284    if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
    285        return 0;
    286    }
    287    ncpv=(UPropsVectors *)pv;
    288    row=_findRow(ncpv, c);
    289    return row[2+column];
    290 }
    291 
    292 U_CAPI uint32_t * U_EXPORT2
    293 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
    294             UChar32 *pRangeStart, UChar32 *pRangeEnd) {
    295    uint32_t *row;
    296    int32_t columns;
    297 
    298    if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
    299        return nullptr;
    300    }
    301 
    302    columns=pv->columns;
    303    row=pv->v+rowIndex*columns;
    304    if(pRangeStart!=nullptr) {
    305        *pRangeStart=(UChar32)row[0];
    306    }
    307    if(pRangeEnd!=nullptr) {
    308        *pRangeEnd=(UChar32)row[1]-1;
    309    }
    310    return row+2;
    311 }
    312 
    313 static int32_t U_CALLCONV
    314 upvec_compareRows(const void *context, const void *l, const void *r) {
    315    const uint32_t* left = static_cast<const uint32_t*>(l), *right = static_cast<const uint32_t*>(r);
    316    const UPropsVectors* pv = static_cast<const UPropsVectors*>(context);
    317    int32_t i, count, columns;
    318 
    319    count=columns=pv->columns; /* includes start/limit columns */
    320 
    321    /* start comparing after start/limit but wrap around to them */
    322    i=2;
    323    do {
    324        if(left[i]!=right[i]) {
    325            return left[i]<right[i] ? -1 : 1;
    326        }
    327        if(++i==columns) {
    328            i=0;
    329        }
    330    } while(--count>0);
    331 
    332    return 0;
    333 }
    334 
    335 U_CAPI void U_EXPORT2
    336 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
    337    uint32_t *row;
    338    int32_t i, columns, valueColumns, rows, count;
    339    UChar32 start, limit;
    340 
    341    /* argument checking */
    342    if(U_FAILURE(*pErrorCode)) {
    343        return;
    344    }
    345    if(handler==nullptr) {
    346        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    347        return;
    348    }
    349    if(pv->isCompacted) {
    350        return;
    351    }
    352 
    353    /* Set the flag now: Sorting and compacting destroys the builder data structure. */
    354    pv->isCompacted=true;
    355 
    356    rows=pv->rows;
    357    columns=pv->columns;
    358    U_ASSERT(columns>=3); /* upvec_open asserts this */
    359    valueColumns=columns-2; /* not counting start & limit */
    360 
    361    /* sort the properties vectors to find unique vector values */
    362    uprv_sortArray(pv->v, rows, columns*4,
    363                   upvec_compareRows, pv, false, pErrorCode);
    364    if(U_FAILURE(*pErrorCode)) {
    365        return;
    366    }
    367 
    368    /*
    369     * Find and set the special values.
    370     * This has to do almost the same work as the compaction below,
    371     * to find the indexes where the special-value rows will move.
    372     */
    373    row=pv->v;
    374    count=-valueColumns;
    375    for(i=0; i<rows; ++i) {
    376        start=(UChar32)row[0];
    377 
    378        /* count a new values vector if it is different from the current one */
    379        if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
    380            count+=valueColumns;
    381        }
    382 
    383        if(start>=UPVEC_FIRST_SPECIAL_CP) {
    384            handler(context, start, start, count, row+2, valueColumns, pErrorCode);
    385            if(U_FAILURE(*pErrorCode)) {
    386                return;
    387            }
    388        }
    389 
    390        row+=columns;
    391    }
    392 
    393    /* count is at the beginning of the last vector, add valueColumns to include that last vector */
    394    count+=valueColumns;
    395 
    396    /* Call the handler once more to signal the start of delivering real values. */
    397    handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
    398            count, row-valueColumns, valueColumns, pErrorCode);
    399    if(U_FAILURE(*pErrorCode)) {
    400        return;
    401    }
    402 
    403    /*
    404     * Move vector contents up to a contiguous array with only unique
    405     * vector values, and call the handler function for each vector.
    406     *
    407     * This destroys the Properties Vector structure and replaces it
    408     * with an array of just vector values.
    409     */
    410    row=pv->v;
    411    count=-valueColumns;
    412    for(i=0; i<rows; ++i) {
    413        /* fetch these first before memmove() may overwrite them */
    414        start=(UChar32)row[0];
    415        limit=(UChar32)row[1];
    416 
    417        /* add a new values vector if it is different from the current one */
    418        if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
    419            count+=valueColumns;
    420            uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4);
    421        }
    422 
    423        if(start<UPVEC_FIRST_SPECIAL_CP) {
    424            handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
    425            if(U_FAILURE(*pErrorCode)) {
    426                return;
    427            }
    428        }
    429 
    430        row+=columns;
    431    }
    432 
    433    /* count is at the beginning of the last vector, add one to include that last vector */
    434    pv->rows=count/valueColumns+1;
    435 }
    436 
    437 U_CAPI const uint32_t * U_EXPORT2
    438 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
    439    if(!pv->isCompacted) {
    440        return nullptr;
    441    }
    442    if(pRows!=nullptr) {
    443        *pRows=pv->rows;
    444    }
    445    if(pColumns!=nullptr) {
    446        *pColumns=pv->columns-2;
    447    }
    448    return pv->v;
    449 }
    450 
    451 U_CAPI uint32_t * U_EXPORT2
    452 upvec_cloneArray(const UPropsVectors *pv,
    453                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
    454    uint32_t *clonedArray;
    455    int32_t byteLength;
    456 
    457    if(U_FAILURE(*pErrorCode)) {
    458        return nullptr;
    459    }
    460    if(!pv->isCompacted) {
    461        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    462        return nullptr;
    463    }
    464    byteLength=pv->rows*(pv->columns-2)*4;
    465    clonedArray=(uint32_t *)uprv_malloc(byteLength);
    466    if(clonedArray==nullptr) {
    467        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    468        return nullptr;
    469    }
    470    uprv_memcpy(clonedArray, pv->v, byteLength);
    471    if(pRows!=nullptr) {
    472        *pRows=pv->rows;
    473    }
    474    if(pColumns!=nullptr) {
    475        *pColumns=pv->columns-2;
    476    }
    477    return clonedArray;
    478 }
    479 
    480 U_CAPI UTrie2 * U_EXPORT2
    481 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
    482    UPVecToUTrie2Context toUTrie2={ nullptr, 0, 0, 0 };
    483    upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
    484    utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
    485    if(U_FAILURE(*pErrorCode)) {
    486        utrie2_close(toUTrie2.trie);
    487        toUTrie2.trie=nullptr;
    488    }
    489    return toUTrie2.trie;
    490 }
    491 
    492 /*
    493 * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
    494 * some 16-bit field and builds and returns a UTrie2.
    495 */
    496 
    497 U_CAPI void U_CALLCONV
    498 upvec_compactToUTrie2Handler(void *context,
    499                             UChar32 start, UChar32 end,
    500                             int32_t rowIndex, uint32_t *row, int32_t columns,
    501                             UErrorCode *pErrorCode) {
    502    (void)row;
    503    (void)columns;
    504    UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
    505    if(start<UPVEC_FIRST_SPECIAL_CP) {
    506        utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, true, pErrorCode);
    507    } else {
    508        switch(start) {
    509        case UPVEC_INITIAL_VALUE_CP:
    510            toUTrie2->initialValue=rowIndex;
    511            break;
    512        case UPVEC_ERROR_VALUE_CP:
    513            toUTrie2->errorValue=rowIndex;
    514            break;
    515        case UPVEC_START_REAL_VALUES_CP:
    516            toUTrie2->maxValue=rowIndex;
    517            if(rowIndex>0xffff) {
    518                /* too many rows for a 16-bit trie */
    519                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    520            } else {
    521                toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
    522                                           toUTrie2->errorValue, pErrorCode);
    523            }
    524            break;
    525        default:
    526            break;
    527        }
    528    }
    529 }