[ tor-browser ].git.dasho

ucnvlat1.cpp (22301B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /* 
      4 **********************************************************************
      5 *   Copyright (C) 2000-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   file name:  ucnvlat1.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2000feb07
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_CONVERSION
     20 
     21 #include "unicode/ucnv.h"
     22 #include "unicode/uset.h"
     23 #include "unicode/utf8.h"
     24 #include "ucnv_bld.h"
     25 #include "ucnv_cnv.h"
     26 #include "ustr_imp.h"
     27 
     28 /* control optimizations according to the platform */
     29 #define LATIN1_UNROLL_FROM_UNICODE 1
     30 
     31 /* ISO 8859-1 --------------------------------------------------------------- */
     32 
     33 /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
     34 U_CDECL_BEGIN
     35 static void U_CALLCONV
     36 _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     37                            UErrorCode *pErrorCode) {
     38    const uint8_t *source;
     39    char16_t *target;
     40    int32_t targetCapacity, length;
     41    int32_t *offsets;
     42 
     43    int32_t sourceIndex;
     44 
     45    /* set up the local pointers */
     46    source=(const uint8_t *)pArgs->source;
     47    target=pArgs->target;
     48    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
     49    offsets=pArgs->offsets;
     50 
     51    sourceIndex=0;
     52 
     53    /*
     54     * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
     55     * for the minimum of the sourceLength and targetCapacity
     56     */
     57    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
     58    if(length<=targetCapacity) {
     59        targetCapacity=length;
     60    } else {
     61        /* target will be full */
     62        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     63        length=targetCapacity;
     64    }
     65 
     66    if(targetCapacity>=8) {
     67        /* This loop is unrolled for speed and improved pipelining. */
     68        int32_t count, loops;
     69 
     70        loops=count=targetCapacity>>3;
     71        length=targetCapacity&=0x7;
     72        do {
     73            target[0]=source[0];
     74            target[1]=source[1];
     75            target[2]=source[2];
     76            target[3]=source[3];
     77            target[4]=source[4];
     78            target[5]=source[5];
     79            target[6]=source[6];
     80            target[7]=source[7];
     81            target+=8;
     82            source+=8;
     83        } while(--count>0);
     84 
     85        if(offsets!=nullptr) {
     86            do {
     87                offsets[0]=sourceIndex++;
     88                offsets[1]=sourceIndex++;
     89                offsets[2]=sourceIndex++;
     90                offsets[3]=sourceIndex++;
     91                offsets[4]=sourceIndex++;
     92                offsets[5]=sourceIndex++;
     93                offsets[6]=sourceIndex++;
     94                offsets[7]=sourceIndex++;
     95                offsets+=8;
     96            } while(--loops>0);
     97        }
     98    }
     99 
    100    /* conversion loop */
    101    while(targetCapacity>0) {
    102        *target++=*source++;
    103        --targetCapacity;
    104    }
    105 
    106    /* write back the updated pointers */
    107    pArgs->source=(const char *)source;
    108    pArgs->target=target;
    109 
    110    /* set offsets */
    111    if(offsets!=nullptr) {
    112        while(length>0) {
    113            *offsets++=sourceIndex++;
    114            --length;
    115        }
    116        pArgs->offsets=offsets;
    117    }
    118 }
    119 
    120 /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
    121 static UChar32 U_CALLCONV
    122 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
    123                    UErrorCode *pErrorCode) {
    124    const uint8_t *source=(const uint8_t *)pArgs->source;
    125    if(source<(const uint8_t *)pArgs->sourceLimit) {
    126        pArgs->source=(const char *)(source+1);
    127        return *source;
    128    }
    129 
    130    /* no output because of empty input */
    131    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    132    return 0xffff;
    133 }
    134 
    135 /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
    136 static void U_CALLCONV
    137 _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    138                              UErrorCode *pErrorCode) {
    139    UConverter *cnv;
    140    const char16_t *source, *sourceLimit;
    141    uint8_t *target, *oldTarget;
    142    int32_t targetCapacity, length;
    143    int32_t *offsets;
    144 
    145    UChar32 cp;
    146    char16_t c, max;
    147 
    148    int32_t sourceIndex;
    149 
    150    /* set up the local pointers */
    151    cnv=pArgs->converter;
    152    source=pArgs->source;
    153    sourceLimit=pArgs->sourceLimit;
    154    target=oldTarget=(uint8_t *)pArgs->target;
    155    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    156    offsets=pArgs->offsets;
    157 
    158    if(cnv->sharedData==&_Latin1Data) {
    159        max=0xff; /* Latin-1 */
    160    } else {
    161        max=0x7f; /* US-ASCII */
    162    }
    163 
    164    /* get the converter state from UConverter */
    165    cp=cnv->fromUChar32;
    166 
    167    /* sourceIndex=-1 if the current character began in the previous buffer */
    168    sourceIndex= cp==0 ? 0 : -1;
    169 
    170    /*
    171     * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
    172     * for the minimum of the sourceLength and targetCapacity
    173     */
    174    length=(int32_t)(sourceLimit-source);
    175    if(length<targetCapacity) {
    176        targetCapacity=length;
    177    }
    178 
    179    /* conversion loop */
    180    if(cp!=0 && targetCapacity>0) {
    181        goto getTrail;
    182    }
    183 
    184 #if LATIN1_UNROLL_FROM_UNICODE
    185    /* unroll the loop with the most common case */
    186    if(targetCapacity>=16) {
    187        int32_t count, loops;
    188        char16_t u, oredChars;
    189 
    190        loops=count=targetCapacity>>4;
    191        do {
    192            oredChars=u=*source++;
    193            *target++=(uint8_t)u;
    194            oredChars|=u=*source++;
    195            *target++=(uint8_t)u;
    196            oredChars|=u=*source++;
    197            *target++=(uint8_t)u;
    198            oredChars|=u=*source++;
    199            *target++=(uint8_t)u;
    200            oredChars|=u=*source++;
    201            *target++=(uint8_t)u;
    202            oredChars|=u=*source++;
    203            *target++=(uint8_t)u;
    204            oredChars|=u=*source++;
    205            *target++=(uint8_t)u;
    206            oredChars|=u=*source++;
    207            *target++=(uint8_t)u;
    208            oredChars|=u=*source++;
    209            *target++=(uint8_t)u;
    210            oredChars|=u=*source++;
    211            *target++=(uint8_t)u;
    212            oredChars|=u=*source++;
    213            *target++=(uint8_t)u;
    214            oredChars|=u=*source++;
    215            *target++=(uint8_t)u;
    216            oredChars|=u=*source++;
    217            *target++=(uint8_t)u;
    218            oredChars|=u=*source++;
    219            *target++=(uint8_t)u;
    220            oredChars|=u=*source++;
    221            *target++=(uint8_t)u;
    222            oredChars|=u=*source++;
    223            *target++=(uint8_t)u;
    224 
    225            /* were all 16 entries really valid? */
    226            if(oredChars>max) {
    227                /* no, return to the first of these 16 */
    228                source-=16;
    229                target-=16;
    230                break;
    231            }
    232        } while(--count>0);
    233        count=loops-count;
    234        targetCapacity-=16*count;
    235 
    236        if(offsets!=nullptr) {
    237            oldTarget+=16*count;
    238            while(count>0) {
    239                *offsets++=sourceIndex++;
    240                *offsets++=sourceIndex++;
    241                *offsets++=sourceIndex++;
    242                *offsets++=sourceIndex++;
    243                *offsets++=sourceIndex++;
    244                *offsets++=sourceIndex++;
    245                *offsets++=sourceIndex++;
    246                *offsets++=sourceIndex++;
    247                *offsets++=sourceIndex++;
    248                *offsets++=sourceIndex++;
    249                *offsets++=sourceIndex++;
    250                *offsets++=sourceIndex++;
    251                *offsets++=sourceIndex++;
    252                *offsets++=sourceIndex++;
    253                *offsets++=sourceIndex++;
    254                *offsets++=sourceIndex++;
    255                --count;
    256            }
    257        }
    258    }
    259 #endif
    260 
    261    /* conversion loop */
    262    c=0;
    263    while(targetCapacity>0 && (c=*source++)<=max) {
    264        /* convert the Unicode code point */
    265        *target++=(uint8_t)c;
    266        --targetCapacity;
    267    }
    268 
    269    if(c>max) {
    270        cp=c;
    271        if(!U_IS_SURROGATE(cp)) {
    272            /* callback(unassigned) */
    273        } else if(U_IS_SURROGATE_LEAD(cp)) {
    274 getTrail:
    275            if(source<sourceLimit) {
    276                /* test the following code unit */
    277                char16_t trail=*source;
    278                if(U16_IS_TRAIL(trail)) {
    279                    ++source;
    280                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
    281                    /* this codepage does not map supplementary code points */
    282                    /* callback(unassigned) */
    283                } else {
    284                    /* this is an unmatched lead code unit (1st surrogate) */
    285                    /* callback(illegal) */
    286                }
    287            } else {
    288                /* no more input */
    289                cnv->fromUChar32=cp;
    290                goto noMoreInput;
    291            }
    292        } else {
    293            /* this is an unmatched trail code unit (2nd surrogate) */
    294            /* callback(illegal) */
    295        }
    296 
    297        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
    298        cnv->fromUChar32=cp;
    299    }
    300 noMoreInput:
    301 
    302    /* set offsets since the start */
    303    if(offsets!=nullptr) {
    304        size_t count=target-oldTarget;
    305        while(count>0) {
    306            *offsets++=sourceIndex++;
    307            --count;
    308        }
    309    }
    310 
    311    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
    312        /* target is full */
    313        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    314    }
    315 
    316    /* write back the updated pointers */
    317    pArgs->source=source;
    318    pArgs->target=(char *)target;
    319    pArgs->offsets=offsets;
    320 }
    321 
    322 /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
    323 static void U_CALLCONV
    324 ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    325                    UConverterToUnicodeArgs *pToUArgs,
    326                    UErrorCode *pErrorCode) {
    327    UConverter *utf8;
    328    const uint8_t *source, *sourceLimit;
    329    uint8_t *target;
    330    int32_t targetCapacity;
    331 
    332    UChar32 c;
    333    uint8_t b, t1;
    334 
    335    /* set up the local pointers */
    336    utf8=pToUArgs->converter;
    337    source=(uint8_t *)pToUArgs->source;
    338    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
    339    target=(uint8_t *)pFromUArgs->target;
    340    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
    341 
    342    /* get the converter state from the UTF-8 UConverter */
    343    if (utf8->toULength > 0) {
    344        c=(UChar32)utf8->toUnicodeStatus;
    345    } else {
    346        c = 0;
    347    }
    348    if(c!=0 && source<sourceLimit) {
    349        if(targetCapacity==0) {
    350            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    351            return;
    352        } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
    353            ++source;
    354            *target++=(uint8_t)(((c&3)<<6)|t1);
    355            --targetCapacity;
    356 
    357            utf8->toUnicodeStatus=0;
    358            utf8->toULength=0;
    359        } else {
    360            /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
    361            *pErrorCode=U_USING_DEFAULT_WARNING;
    362            return;
    363        }
    364    }
    365 
    366    /*
    367     * Make sure that the last byte sequence before sourceLimit is complete
    368     * or runs into a lead byte.
    369     * In the conversion loop compare source with sourceLimit only once
    370     * per multi-byte character.
    371     * For Latin-1, adjust sourceLimit only for 1 trail byte because
    372     * the conversion loop handles at most 2-byte sequences.
    373     */
    374    if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
    375        --sourceLimit;
    376    }
    377 
    378    /* conversion loop */
    379    while(source<sourceLimit) {
    380        if(targetCapacity>0) {
    381            b=*source++;
    382            if(U8_IS_SINGLE(b)) {
    383                /* convert ASCII */
    384                *target++ = b;
    385                --targetCapacity;
    386            } else if( /* handle U+0080..U+00FF inline */
    387                       b>=0xc2 && b<=0xc3 &&
    388                       (t1=(uint8_t)(*source-0x80)) <= 0x3f
    389            ) {
    390                ++source;
    391                *target++=(uint8_t)(((b&3)<<6)|t1);
    392                --targetCapacity;
    393            } else {
    394                /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
    395                pToUArgs->source=(char *)(source-1);
    396                pFromUArgs->target=(char *)target;
    397                *pErrorCode=U_USING_DEFAULT_WARNING;
    398                return;
    399            }
    400        } else {
    401            /* target is full */
    402            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    403            break;
    404        }
    405    }
    406 
    407    /*
    408     * The sourceLimit may have been adjusted before the conversion loop
    409     * to stop before a truncated sequence.
    410     * If so, then collect the truncated sequence now.
    411     * For Latin-1, there is at most exactly one lead byte because of the
    412     * smaller sourceLimit adjustment logic.
    413     */
    414    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
    415        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
    416        utf8->toULength=1;
    417        utf8->mode=U8_COUNT_BYTES(b);
    418    }
    419 
    420    /* write back the updated pointers */
    421    pToUArgs->source=(char *)source;
    422    pFromUArgs->target=(char *)target;
    423 }
    424 
    425 static void U_CALLCONV
    426 _Latin1GetUnicodeSet(const UConverter *cnv,
    427                     const USetAdder *sa,
    428                     UConverterUnicodeSet which,
    429                     UErrorCode *pErrorCode) {
    430    (void)cnv;
    431    (void)which;
    432    (void)pErrorCode;
    433    sa->addRange(sa->set, 0, 0xff);
    434 }
    435 U_CDECL_END
    436 
    437 
    438 static const UConverterImpl _Latin1Impl={
    439    UCNV_LATIN_1,
    440 
    441    nullptr,
    442    nullptr,
    443 
    444    nullptr,
    445    nullptr,
    446    nullptr,
    447 
    448    _Latin1ToUnicodeWithOffsets,
    449    _Latin1ToUnicodeWithOffsets,
    450    _Latin1FromUnicodeWithOffsets,
    451    _Latin1FromUnicodeWithOffsets,
    452    _Latin1GetNextUChar,
    453 
    454    nullptr,
    455    nullptr,
    456    nullptr,
    457    nullptr,
    458    _Latin1GetUnicodeSet,
    459 
    460    nullptr,
    461    ucnv_Latin1FromUTF8
    462 };
    463 
    464 static const UConverterStaticData _Latin1StaticData={
    465    sizeof(UConverterStaticData),
    466    "ISO-8859-1",
    467    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
    468    { 0x1a, 0, 0, 0 }, 1, false, false,
    469    0,
    470    0,
    471    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    472 };
    473 
    474 const UConverterSharedData _Latin1Data=
    475        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
    476 
    477 /* US-ASCII ----------------------------------------------------------------- */
    478 
    479 U_CDECL_BEGIN
    480 /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
    481 static void U_CALLCONV
    482 _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    483                           UErrorCode *pErrorCode) {
    484    const uint8_t *source, *sourceLimit;
    485    char16_t *target, *oldTarget;
    486    int32_t targetCapacity, length;
    487    int32_t *offsets;
    488 
    489    int32_t sourceIndex;
    490 
    491    uint8_t c;
    492 
    493    /* set up the local pointers */
    494    source=(const uint8_t *)pArgs->source;
    495    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    496    target=oldTarget=pArgs->target;
    497    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    498    offsets=pArgs->offsets;
    499 
    500    /* sourceIndex=-1 if the current character began in the previous buffer */
    501    sourceIndex=0;
    502 
    503    /*
    504     * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
    505     * for the minimum of the sourceLength and targetCapacity
    506     */
    507    length=(int32_t)(sourceLimit-source);
    508    if(length<targetCapacity) {
    509        targetCapacity=length;
    510    }
    511 
    512    if(targetCapacity>=8) {
    513        /* This loop is unrolled for speed and improved pipelining. */
    514        int32_t count, loops;
    515        char16_t oredChars;
    516 
    517        loops=count=targetCapacity>>3;
    518        do {
    519            oredChars=target[0]=source[0];
    520            oredChars|=target[1]=source[1];
    521            oredChars|=target[2]=source[2];
    522            oredChars|=target[3]=source[3];
    523            oredChars|=target[4]=source[4];
    524            oredChars|=target[5]=source[5];
    525            oredChars|=target[6]=source[6];
    526            oredChars|=target[7]=source[7];
    527 
    528            /* were all 16 entries really valid? */
    529            if(oredChars>0x7f) {
    530                /* no, return to the first of these 16 */
    531                break;
    532            }
    533            source+=8;
    534            target+=8;
    535        } while(--count>0);
    536        count=loops-count;
    537        targetCapacity-=count*8;
    538 
    539        if(offsets!=nullptr) {
    540            oldTarget+=count*8;
    541            while(count>0) {
    542                offsets[0]=sourceIndex++;
    543                offsets[1]=sourceIndex++;
    544                offsets[2]=sourceIndex++;
    545                offsets[3]=sourceIndex++;
    546                offsets[4]=sourceIndex++;
    547                offsets[5]=sourceIndex++;
    548                offsets[6]=sourceIndex++;
    549                offsets[7]=sourceIndex++;
    550                offsets+=8;
    551                --count;
    552            }
    553        }
    554    }
    555 
    556    /* conversion loop */
    557    c=0;
    558    while(targetCapacity>0 && (c=*source++)<=0x7f) {
    559        *target++=c;
    560        --targetCapacity;
    561    }
    562 
    563    if(c>0x7f) {
    564        /* callback(illegal); copy the current bytes to toUBytes[] */
    565        UConverter *cnv=pArgs->converter;
    566        cnv->toUBytes[0]=c;
    567        cnv->toULength=1;
    568        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    569    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
    570        /* target is full */
    571        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    572    }
    573 
    574    /* set offsets since the start */
    575    if(offsets!=nullptr) {
    576        size_t count=target-oldTarget;
    577        while(count>0) {
    578            *offsets++=sourceIndex++;
    579            --count;
    580        }
    581    }
    582 
    583    /* write back the updated pointers */
    584    pArgs->source=(const char *)source;
    585    pArgs->target=target;
    586    pArgs->offsets=offsets;
    587 }
    588 
    589 /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
    590 static UChar32 U_CALLCONV
    591 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
    592                   UErrorCode *pErrorCode) {
    593    const uint8_t *source;
    594    uint8_t b;
    595 
    596    source=(const uint8_t *)pArgs->source;
    597    if(source<(const uint8_t *)pArgs->sourceLimit) {
    598        b=*source++;
    599        pArgs->source=(const char *)source;
    600        if(b<=0x7f) {
    601            return b;
    602        } else {
    603            UConverter *cnv=pArgs->converter;
    604            cnv->toUBytes[0]=b;
    605            cnv->toULength=1;
    606            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    607            return 0xffff;
    608        }
    609    }
    610 
    611    /* no output because of empty input */
    612    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    613    return 0xffff;
    614 }
    615 
    616 /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
    617 static void U_CALLCONV
    618 ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
    619                   UConverterToUnicodeArgs *pToUArgs,
    620                   UErrorCode *pErrorCode) {
    621    const uint8_t *source, *sourceLimit;
    622    uint8_t *target;
    623    int32_t targetCapacity, length;
    624 
    625    uint8_t c;
    626 
    627    if(pToUArgs->converter->toULength > 0) {
    628        /* no handling of partial UTF-8 characters here, fall back to pivoting */
    629        *pErrorCode=U_USING_DEFAULT_WARNING;
    630        return;
    631    }
    632 
    633    /* set up the local pointers */
    634    source=(const uint8_t *)pToUArgs->source;
    635    sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
    636    target=(uint8_t *)pFromUArgs->target;
    637    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
    638 
    639    /*
    640     * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
    641     * for the minimum of the sourceLength and targetCapacity
    642     */
    643    length=(int32_t)(sourceLimit-source);
    644    if(length<targetCapacity) {
    645        targetCapacity=length;
    646    }
    647 
    648    /* unroll the loop with the most common case */
    649    if(targetCapacity>=16) {
    650        int32_t count, loops;
    651        uint8_t oredChars;
    652 
    653        loops=count=targetCapacity>>4;
    654        do {
    655            oredChars=*target++=*source++;
    656            oredChars|=*target++=*source++;
    657            oredChars|=*target++=*source++;
    658            oredChars|=*target++=*source++;
    659            oredChars|=*target++=*source++;
    660            oredChars|=*target++=*source++;
    661            oredChars|=*target++=*source++;
    662            oredChars|=*target++=*source++;
    663            oredChars|=*target++=*source++;
    664            oredChars|=*target++=*source++;
    665            oredChars|=*target++=*source++;
    666            oredChars|=*target++=*source++;
    667            oredChars|=*target++=*source++;
    668            oredChars|=*target++=*source++;
    669            oredChars|=*target++=*source++;
    670            oredChars|=*target++=*source++;
    671 
    672            /* were all 16 entries really valid? */
    673            if(oredChars>0x7f) {
    674                /* no, return to the first of these 16 */
    675                source-=16;
    676                target-=16;
    677                break;
    678            }
    679        } while(--count>0);
    680        count=loops-count;
    681        targetCapacity-=16*count;
    682    }
    683 
    684    /* conversion loop */
    685    c=0;
    686    while(targetCapacity>0 && (c=*source)<=0x7f) {
    687        ++source;
    688        *target++=c;
    689        --targetCapacity;
    690    }
    691 
    692    if(c>0x7f) {
    693        /* non-ASCII character, handle in standard converter */
    694        *pErrorCode=U_USING_DEFAULT_WARNING;
    695    } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
    696        /* target is full */
    697        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    698    }
    699 
    700    /* write back the updated pointers */
    701    pToUArgs->source=(const char *)source;
    702    pFromUArgs->target=(char *)target;
    703 }
    704 
    705 static void U_CALLCONV
    706 _ASCIIGetUnicodeSet(const UConverter *cnv,
    707                    const USetAdder *sa,
    708                    UConverterUnicodeSet which,
    709                    UErrorCode *pErrorCode) {
    710    (void)cnv;
    711    (void)which;
    712    (void)pErrorCode;
    713    sa->addRange(sa->set, 0, 0x7f);
    714 }
    715 U_CDECL_END
    716 
    717 static const UConverterImpl _ASCIIImpl={
    718    UCNV_US_ASCII,
    719 
    720    nullptr,
    721    nullptr,
    722 
    723    nullptr,
    724    nullptr,
    725    nullptr,
    726 
    727    _ASCIIToUnicodeWithOffsets,
    728    _ASCIIToUnicodeWithOffsets,
    729    _Latin1FromUnicodeWithOffsets,
    730    _Latin1FromUnicodeWithOffsets,
    731    _ASCIIGetNextUChar,
    732 
    733    nullptr,
    734    nullptr,
    735    nullptr,
    736    nullptr,
    737    _ASCIIGetUnicodeSet,
    738 
    739    nullptr,
    740    ucnv_ASCIIFromUTF8
    741 };
    742 
    743 static const UConverterStaticData _ASCIIStaticData={
    744    sizeof(UConverterStaticData),
    745    "US-ASCII",
    746    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
    747    { 0x1a, 0, 0, 0 }, 1, false, false,
    748    0,
    749    0,
    750    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    751 };
    752 
    753 const UConverterSharedData _ASCIIData=
    754        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
    755 
    756 #endif
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE