tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rle.c (12561B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2000-2003, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File writejava.c
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   01/11/02    Ram        Creation.
     17 *******************************************************************************
     18 */
     19 #include <stdbool.h>
     20 #include "rle.h"
     21 /**
     22 * The ESCAPE character is used during run-length encoding.  It signals
     23 * a run of identical chars.
     24 */
     25 static const uint16_t ESCAPE = 0xA5A5;
     26 
     27 /**
     28 * The ESCAPE_BYTE character is used during run-length encoding.  It signals
     29 * a run of identical bytes.
     30 */
     31 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
     32 
     33 /**
     34 * Append a byte to the given StringBuffer, packing two bytes into each
     35 * character.  The state parameter maintains intermediary data between
     36 * calls.
     37 * @param state A two-element array, with state[0] == 0 if this is the
     38 * first byte of a pair, or state[0] != 0 if this is the second byte
     39 * of a pair, in which case state[1] is the first byte.
     40 */
     41 static uint16_t*
     42 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
     43    if(!status || U_FAILURE(*status)){
     44        return NULL;
     45    }
     46    if (state[0] != 0) {
     47        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
     48        if(buffer < buffLimit){
     49            *buffer++ = c;
     50        }else{
     51            *status = U_BUFFER_OVERFLOW_ERROR;
     52        }
     53        state[0] = 0;
     54        return buffer;
     55    }
     56    else {
     57        state[0] = 1;
     58        state[1] = value;
     59        return buffer;
     60    }
     61 }
     62 /**
     63 * Encode a run, possibly a degenerate run (of < 4 values).
     64 * @param length The length of the run; must be > 0 && <= 0xFF.
     65 */
     66 static uint16_t*
     67 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
     68    if(!status || U_FAILURE(*status)){
     69        return NULL;
     70    }
     71    if (length < 4) {
     72        int32_t j=0;
     73        for (; j<length; ++j) {
     74            if (value == ESCAPE_BYTE) {
     75                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
     76            }
     77            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
     78        }
     79    }
     80    else {
     81        if (length == ESCAPE_BYTE) {
     82            if (value == ESCAPE_BYTE){
     83               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
     84            }
     85            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
     86            --length;
     87        }
     88        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
     89        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
     90        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
     91    }
     92    return buffer;
     93 }
     94 
     95 #define APPEND( buffer, bufLimit, value, status) UPRV_BLOCK_MACRO_BEGIN { \
     96    if(buffer<bufLimit){                    \
     97        *buffer++=(value);                  \
     98    }else{                                  \
     99        *status = U_BUFFER_OVERFLOW_ERROR;  \
    100    }                                       \
    101 } UPRV_BLOCK_MACRO_END
    102 
    103 /**
    104 * Encode a run, possibly a degenerate run (of < 4 values).
    105 * @param length The length of the run; must be > 0 && <= 0xFFFF.
    106 */
    107 static uint16_t*
    108 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
    109    if (length < 4) {
    110        int j=0;
    111        for (; j<length; ++j) {
    112            if (value == (int32_t) ESCAPE){
    113                APPEND(buffer,bufLimit,ESCAPE, status);
    114 
    115            }
    116            APPEND(buffer,bufLimit,value,status);
    117        }
    118    }
    119    else {
    120        if (length == (int32_t) ESCAPE) {
    121            if (value == (int32_t) ESCAPE){
    122                APPEND(buffer,bufLimit,ESCAPE,status);
    123 
    124            }
    125            APPEND(buffer,bufLimit,value,status);
    126            --length;
    127        }
    128        APPEND(buffer,bufLimit,ESCAPE,status);
    129        APPEND(buffer,bufLimit,(uint16_t) length,status);
    130        APPEND(buffer,bufLimit,(uint16_t)value, status); /* Don't need to escape this value */
    131    }
    132    return buffer;
    133 }
    134 
    135 /**
    136 * Construct a string representing a char array.  Use run-length encoding.
    137 * A character represents itself, unless it is the ESCAPE character.  Then
    138 * the following notations are possible:
    139 *   ESCAPE ESCAPE   ESCAPE literal
    140 *   ESCAPE n c      n instances of character c
    141 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    142 * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    143 * If we encounter a run where n == ESCAPE, we represent this as:
    144 *   c ESCAPE n-1 c
    145 * The ESCAPE value is chosen so as not to collide with commonly
    146 * seen values.
    147 */
    148 int32_t
    149 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
    150    uint16_t* bufLimit =  buffer+bufLen;
    151    uint16_t* saveBuffer = buffer;
    152    if(buffer < bufLimit){
    153        *buffer++ =  (uint16_t)(srcLen>>16);
    154        if(buffer<bufLimit){
    155            uint16_t runValue = src[0];
    156            int32_t runLength = 1;
    157            int i=1;
    158            *buffer++ = (uint16_t) srcLen;
    159 
    160            for (; i<srcLen; ++i) {
    161                uint16_t s = src[i];
    162                if (s == runValue && runLength < 0xFFFF){
    163                    ++runLength;
    164                }else {
    165                    buffer = encodeRunShort(buffer, bufLimit, runValue, runLength, status);
    166                    runValue = s;
    167                    runLength = 1;
    168                }
    169            }
    170            buffer = encodeRunShort(buffer, bufLimit, runValue, runLength, status);
    171        }else{
    172            *status = U_BUFFER_OVERFLOW_ERROR;
    173        }
    174    }else{
    175        *status = U_BUFFER_OVERFLOW_ERROR;
    176    }
    177    return (int32_t)(buffer - saveBuffer);
    178 }
    179 
    180 /**
    181 * Construct a string representing a byte array.  Use run-length encoding.
    182 * Two bytes are packed into a single char, with a single extra zero byte at
    183 * the end if needed.  A byte represents itself, unless it is the
    184 * ESCAPE_BYTE.  Then the following notations are possible:
    185 *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
    186 *   ESCAPE_BYTE n b           n instances of byte b
    187 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
    188 * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
    189 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
    190 *   b ESCAPE_BYTE n-1 b
    191 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
    192 * seen values.
    193 */
    194 int32_t
    195 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
    196    const uint16_t* saveBuf = buffer;
    197    uint16_t* bufLimit =  buffer+bufLen;
    198    if(buffer < bufLimit){
    199        *buffer++ = ((uint16_t) (srcLen >> 16));
    200 
    201        if(buffer<bufLimit){
    202            uint8_t runValue = src[0];
    203            int runLength = 1;
    204            uint8_t state[2]= {0};
    205            int i=1;
    206            *buffer++=((uint16_t) srcLen);
    207            for (; i<srcLen; ++i) {
    208                uint8_t b = src[i];
    209                if (b == runValue && runLength < 0xFF){
    210                    ++runLength;
    211                }
    212                else {
    213                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
    214                    runValue = b;
    215                    runLength = 1;
    216                }
    217            }
    218            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
    219 
    220            /* We must save the final byte, if there is one, by padding
    221             * an extra zero.
    222             */
    223            if (state[0] != 0) {
    224                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
    225            }
    226        }else{
    227            *status = U_BUFFER_OVERFLOW_ERROR;
    228        }
    229    }else{
    230        *status = U_BUFFER_OVERFLOW_ERROR;
    231    }
    232    return (int32_t) (buffer - saveBuf);
    233 }
    234 
    235 
    236 /**
    237 * Construct an array of shorts from a run-length encoded string.
    238 */
    239 int32_t
    240 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
    241    int32_t length = 0;
    242    int32_t ai = 0;
    243    int i=2;
    244 
    245    if(!status || U_FAILURE(*status)){
    246        return 0;
    247    }
    248    /* the source is null terminated */
    249    if(srcLen == -1){
    250        srcLen = u_strlen(src);
    251    }
    252    if(srcLen <= 2){
    253        return 2;
    254    }
    255    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
    256 
    257    if(target == NULL){
    258        return length;
    259    }
    260    if(tgtLen < length){
    261        *status = U_BUFFER_OVERFLOW_ERROR;
    262        return length;
    263    }
    264 
    265    for (; i<srcLen; ++i) {
    266        uint16_t c = src[i];
    267        if (c == ESCAPE) {
    268            c = src[++i];
    269            if (c == ESCAPE) {
    270                target[ai++] = c;
    271            } else {
    272                int32_t runLength = (int32_t) c;
    273                uint16_t runValue = src[++i];
    274                int j=0;
    275                for (; j<runLength; ++j) {
    276                    target[ai++] = runValue;
    277                }
    278            }
    279        }
    280        else {
    281            target[ai++] = c;
    282        }
    283    }
    284 
    285    if (ai != length){
    286        *status = U_INTERNAL_PROGRAM_ERROR;
    287    }
    288 
    289    return length;
    290 }
    291 
    292 /**
    293 * Construct an array of bytes from a run-length encoded string.
    294 */
    295 int32_t
    296 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
    297 
    298    int32_t length = 0;
    299    UBool nextChar = true;
    300    uint16_t c = 0;
    301    int32_t node = 0;
    302    int32_t runLength = 0;
    303    int32_t i = 2;
    304    int32_t ai=0;
    305 
    306    if(!status || U_FAILURE(*status)){
    307        return 0;
    308    }
    309    /* the source is null terminated */
    310    if(srcLen == -1){
    311        srcLen = u_strlen(src);
    312    }
    313    if(srcLen <= 2){
    314        return 2;
    315    }
    316    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
    317 
    318    if(target == NULL){
    319        return length;
    320    }
    321    if(tgtLen < length){
    322        *status = U_BUFFER_OVERFLOW_ERROR;
    323        return length;
    324    }
    325 
    326    for (; ai<tgtLen; ) {
    327       /* This part of the loop places the next byte into the local
    328        * variable 'b' each time through the loop.  It keeps the
    329        * current character in 'c' and uses the boolean 'nextChar'
    330        * to see if we've taken both bytes out of 'c' yet.
    331        */
    332        uint8_t b;
    333        if (nextChar) {
    334            c = src[i++];
    335            b = (uint8_t) (c >> 8);
    336            nextChar = false;
    337        }
    338        else {
    339            b = (uint8_t) (c & 0xFF);
    340            nextChar = true;
    341        }
    342 
    343       /* This part of the loop is a tiny state machine which handles
    344        * the parsing of the run-length encoding.  This would be simpler
    345        * if we could look ahead, but we can't, so we use 'node' to
    346        * move between three nodes in the state machine.
    347        */
    348        switch (node) {
    349        case 0:
    350            /* Normal idle node */
    351            if (b == ESCAPE_BYTE) {
    352                node = 1;
    353            }
    354            else {
    355                target[ai++] = b;
    356            }
    357            break;
    358        case 1:
    359           /* We have seen one ESCAPE_BYTE; we expect either a second
    360            * one, or a run length and value.
    361            */
    362            if (b == ESCAPE_BYTE) {
    363                target[ai++] = ESCAPE_BYTE;
    364                node = 0;
    365            }
    366            else {
    367                runLength = b;
    368                node = 2;
    369            }
    370            break;
    371        case 2:
    372            {
    373                int j=0;
    374               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
    375                * the next byte as the value to be repeated.
    376                */
    377                for (; j<runLength; ++j){
    378                    if(ai<tgtLen){
    379                        target[ai++] = b;
    380                    }else{
    381                        *status = U_BUFFER_OVERFLOW_ERROR;
    382                        return ai;
    383                    }
    384                }
    385                node = 0;
    386                break;
    387            }
    388        }
    389    }
    390 
    391    if (node != 0){
    392        *status = U_INTERNAL_PROGRAM_ERROR;
    393        /*("Bad run-length encoded byte array")*/
    394        return 0;
    395    }
    396 
    397 
    398    if (i != srcLen){
    399        /*("Excess data in RLE byte array string");*/
    400        *status = U_INTERNAL_PROGRAM_ERROR;
    401        return ai;
    402    }
    403 
    404    return ai;
    405 }