tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucnv_cb.cpp (8223B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2000-2006, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *  ucnv_cb.c:
      9 *  External APIs for the ICU's codeset conversion library
     10 *  Helena Shih
     11 *
     12 * Modification History:
     13 *
     14 *   Date        Name        Description
     15 *   7/28/2000   srl         Implementation
     16 */
     17 
     18 /**
     19 * @name Character Conversion C API
     20 *
     21 */
     22 
     23 #include "unicode/utypes.h"
     24 
     25 #if !UCONFIG_NO_CONVERSION
     26 
     27 #include "unicode/ucnv_cb.h"
     28 #include "ucnv_bld.h"
     29 #include "ucnv_cnv.h"
     30 #include "cmemory.h"
     31 
     32 /* need to update the offsets when the target moves. */
     33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
     34 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
     35 the same call stack if the complexity arises. */
     36 U_CAPI void  U_EXPORT2
     37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
     38                       const char* source,
     39                       int32_t length,
     40                       int32_t offsetIndex,
     41                       UErrorCode * err)
     42 {
     43    if(U_FAILURE(*err)) {
     44        return;
     45    }
     46 
     47    ucnv_fromUWriteBytes(
     48        args->converter,
     49        source, length,
     50        &args->target, args->targetLimit,
     51        &args->offsets, offsetIndex,
     52        err);
     53 }
     54 
     55 U_CAPI void  U_EXPORT2
     56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
     57                             const char16_t** source,
     58                             const char16_t*  sourceLimit,
     59                             int32_t offsetIndex,
     60                             UErrorCode * err)
     61 {
     62    /*
     63    This is a fun one.  Recursion can occur - we're basically going to
     64    just retry shoving data through the same converter. Note, if you got
     65    here through some kind of invalid sequence, you maybe should emit a
     66    reset sequence of some kind and/or call ucnv_reset().  Since this
     67    IS an actual conversion, take care that you've changed the callback
     68    or the data, or you'll get an infinite loop.
     69 
     70    Please set the err value to something reasonable before calling
     71    into this.
     72    */
     73 
     74    char *oldTarget;
     75 
     76    if(U_FAILURE(*err))
     77    {
     78        return;
     79    }
     80 
     81    oldTarget = args->target;
     82 
     83    ucnv_fromUnicode(args->converter,
     84        &args->target,
     85        args->targetLimit,
     86        source,
     87        sourceLimit,
     88        nullptr, /* no offsets */
     89        false, /* no flush */
     90        err);
     91 
     92    if(args->offsets)
     93    {
     94        while (args->target != oldTarget)  /* if it moved at all.. */
     95        {
     96            *(args->offsets)++ = offsetIndex;
     97            oldTarget++;
     98        }
     99    }
    100 
    101    /*
    102    Note, if you did something like used a Stop subcallback, things would get interesting.
    103    In fact, here's where we want to return the partially consumed in-source!
    104    */
    105    if(*err == U_BUFFER_OVERFLOW_ERROR)
    106    /* && (*source < sourceLimit && args->target >= args->targetLimit)
    107    -- S. Hrcek */
    108    {
    109        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
    110        It's a fixed size. If we overflow it... Hmm */
    111        char *newTarget;
    112        const char *newTargetLimit;
    113        UErrorCode err2 = U_ZERO_ERROR;
    114 
    115        int8_t errBuffLen;
    116 
    117        errBuffLen  = args->converter->charErrorBufferLength;
    118 
    119        /* start the new target at the first free slot in the errbuff.. */
    120        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
    121 
    122        newTargetLimit = (char *)(args->converter->charErrorBuffer +
    123            sizeof(args->converter->charErrorBuffer));
    124 
    125        if(newTarget >= newTargetLimit)
    126        {
    127            *err = U_INTERNAL_PROGRAM_ERROR;
    128            return;
    129        }
    130 
    131        /* We're going to tell the converter that the errbuff len is empty.
    132        This prevents the existing errbuff from being 'flushed' out onto
    133        itself.  If the errbuff is needed by the converter this time,
    134        we're hosed - we're out of space! */
    135 
    136        args->converter->charErrorBufferLength = 0;
    137 
    138        ucnv_fromUnicode(args->converter,
    139                         &newTarget,
    140                         newTargetLimit,
    141                         source,
    142                         sourceLimit,
    143                         nullptr,
    144                         false,
    145                         &err2);
    146 
    147        /* We can go ahead and overwrite the  length here. We know just how
    148        to recalculate it. */
    149 
    150        args->converter->charErrorBufferLength = (int8_t)(
    151            newTarget - (char*)args->converter->charErrorBuffer);
    152 
    153        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
    154        {
    155            /* now we're REALLY in trouble.
    156            Internal program error - callback shouldn't have written this much
    157            data!
    158            */
    159            *err = U_INTERNAL_PROGRAM_ERROR;
    160            return;
    161        }
    162        /*else {*/
    163            /* sub errs could be invalid/truncated/illegal chars or w/e.
    164            These might want to be passed on up.. But the problem is, we already
    165            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
    166            other errs.. */
    167 
    168            /*
    169            if(U_FAILURE(err2))
    170            ??
    171            */
    172        /*}*/
    173    }
    174 }
    175 
    176 U_CAPI void  U_EXPORT2
    177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
    178                           int32_t offsetIndex,
    179                           UErrorCode * err)
    180 {
    181    UConverter *converter;
    182    int32_t length;
    183 
    184    if(U_FAILURE(*err)) {
    185        return;
    186    }
    187    converter = args->converter;
    188    length = converter->subCharLen;
    189 
    190    if(length == 0) {
    191        return;
    192    }
    193 
    194    if(length < 0) {
    195        /*
    196         * Write/convert the substitution string. Its real length is -length.
    197         * Unlike the escape callback, we need not change the converter's
    198         * callback function because ucnv_setSubstString() verified that
    199         * the string can be converted, so we will not get a conversion error
    200         * and will not recurse.
    201         * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
    202         */
    203        const char16_t *source = (const char16_t *)converter->subChars;
    204        ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
    205        return;
    206    }
    207 
    208    if(converter->sharedData->impl->writeSub!=nullptr) {
    209        converter->sharedData->impl->writeSub(args, offsetIndex, err);
    210    }
    211    else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
    212        /*
    213        TODO: Is this untestable because the MBCS converter has a writeSub function to call
    214        and the other converters don't use subChar1?
    215        */
    216        ucnv_cbFromUWriteBytes(args,
    217                               (const char *)&converter->subChar1, 1,
    218                               offsetIndex, err);
    219    }
    220    else {
    221        ucnv_cbFromUWriteBytes(args,
    222                               (const char *)converter->subChars, length,
    223                               offsetIndex, err);
    224    }
    225 }
    226 
    227 U_CAPI void  U_EXPORT2
    228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
    229                            const char16_t* source,
    230                            int32_t length,
    231                            int32_t offsetIndex,
    232                            UErrorCode * err)
    233 {
    234    if(U_FAILURE(*err)) {
    235        return;
    236    }
    237 
    238    ucnv_toUWriteUChars(
    239        args->converter,
    240        source, length,
    241        &args->target, args->targetLimit,
    242        &args->offsets, offsetIndex,
    243        err);
    244 }
    245 
    246 U_CAPI void  U_EXPORT2
    247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
    248                         int32_t offsetIndex,
    249                       UErrorCode * err)
    250 {
    251    static const char16_t kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
    252 
    253    /* could optimize this case, just one uchar */
    254    if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
    255        ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
    256    } else {
    257        ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
    258    }
    259 }
    260 
    261 #endif