[ tor-browser ].git.dasho

ucnv.cpp (96344B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 *  ucnv.c:
     12 *  Implements APIs for the ICU's codeset conversion library;
     13 *  mostly calls through internal functions;
     14 *  created by Bertrand A. Damiba
     15 *
     16 * Modification History:
     17 *
     18 *   Date        Name        Description
     19 *   04/04/99    helena      Fixed internal header inclusion.
     20 *   05/09/00    helena      Added implementation to handle fallback mappings.
     21 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 
     26 #if !UCONFIG_NO_CONVERSION
     27 
     28 #include <memory>
     29 
     30 #include "unicode/ustring.h"
     31 #include "unicode/ucnv.h"
     32 #include "unicode/ucnv_err.h"
     33 #include "unicode/uset.h"
     34 #include "unicode/utf.h"
     35 #include "unicode/utf16.h"
     36 #include "putilimp.h"
     37 #include "cmemory.h"
     38 #include "cstring.h"
     39 #include "uassert.h"
     40 #include "utracimp.h"
     41 #include "ustr_imp.h"
     42 #include "ucnv_imp.h"
     43 #include "ucnv_cnv.h"
     44 #include "ucnv_bld.h"
     45 
     46 /* size of intermediate and preflighting buffers in ucnv_convert() */
     47 #define CHUNK_SIZE 1024
     48 
     49 typedef struct UAmbiguousConverter {
     50    const char *name;
     51    const char16_t variant5c;
     52 } UAmbiguousConverter;
     53 
     54 static const UAmbiguousConverter ambiguousConverters[]={
     55    { "ibm-897_P100-1995", 0xa5 },
     56    { "ibm-942_P120-1999", 0xa5 },
     57    { "ibm-943_P130-1999", 0xa5 },
     58    { "ibm-946_P100-1995", 0xa5 },
     59    { "ibm-33722_P120-1999", 0xa5 },
     60    { "ibm-1041_P100-1995", 0xa5 },
     61    /*{ "ibm-54191_P100-2006", 0xa5 },*/
     62    /*{ "ibm-62383_P100-2007", 0xa5 },*/
     63    /*{ "ibm-891_P100-1995", 0x20a9 },*/
     64    { "ibm-944_P100-1995", 0x20a9 },
     65    { "ibm-949_P110-1999", 0x20a9 },
     66    { "ibm-1363_P110-1997", 0x20a9 },
     67    { "ISO_2022,locale=ko,version=0", 0x20a9 },
     68    { "ibm-1088_P100-1995", 0x20a9 }
     69 };
     70 
     71 /*Calls through createConverter */
     72 U_CAPI UConverter* U_EXPORT2
     73 ucnv_open (const char *name,
     74                       UErrorCode * err)
     75 {
     76    UConverter *r;
     77 
     78    if (err == nullptr || U_FAILURE (*err)) {
     79        return nullptr;
     80    }
     81 
     82    r =  ucnv_createConverter(nullptr, name, err);
     83    return r;
     84 }
     85 
     86 U_CAPI UConverter* U_EXPORT2 
     87 ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
     88 {
     89    return ucnv_createConverterFromPackage(packageName, converterName,  err);
     90 }
     91 
     92 /*Extracts the char16_t* to a char* and calls through createConverter */
     93 U_CAPI UConverter*   U_EXPORT2
     94 ucnv_openU (const char16_t * name,
     95                         UErrorCode * err)
     96 {
     97    char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
     98 
     99    if (err == nullptr || U_FAILURE(*err))
    100        return nullptr;
    101    if (name == nullptr)
    102        return ucnv_open (nullptr, err);
    103    if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
    104    {
    105        *err = U_ILLEGAL_ARGUMENT_ERROR;
    106        return nullptr;
    107    }
    108    return ucnv_open(u_austrcpy(asciiName, name), err);
    109 }
    110 
    111 /* Copy the string that is represented by the UConverterPlatform enum
    112 * @param platformString An output buffer
    113 * @param platform An enum representing a platform
    114 * @return the length of the copied string.
    115 */
    116 static int32_t
    117 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
    118 {
    119    switch (pltfrm)
    120    {
    121    case UCNV_IBM:
    122        uprv_strcpy(platformString, "ibm-");
    123        return 4;
    124    case UCNV_UNKNOWN:
    125        break;
    126    }
    127 
    128    /* default to empty string */
    129    *platformString = 0;
    130    return 0;
    131 }
    132 
    133 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
    134 *through createConverter*/
    135 U_CAPI UConverter*   U_EXPORT2
    136 ucnv_openCCSID (int32_t codepage,
    137                UConverterPlatform platform,
    138                UErrorCode * err)
    139 {
    140    char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    141    int32_t myNameLen;
    142 
    143    if (err == nullptr || U_FAILURE (*err))
    144        return nullptr;
    145 
    146    /* ucnv_copyPlatformString could return "ibm-" or "cp" */
    147    myNameLen = ucnv_copyPlatformString(myName, platform);
    148    T_CString_integerToString(myName + myNameLen, codepage, 10);
    149 
    150    return ucnv_createConverter(nullptr, myName, err);
    151 }
    152 
    153 /* Creating a temporary stack-based object that can be used in one thread, 
    154 and created from a converter that is shared across threads.
    155 */
    156 
    157 U_CAPI UConverter* U_EXPORT2
    158 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
    159 {
    160    UConverter *localConverter, *allocatedConverter;
    161    int32_t stackBufferSize;
    162    int32_t bufferSizeNeeded;
    163    UErrorCode cbErr;
    164    UConverterToUnicodeArgs toUArgs = {
    165        sizeof(UConverterToUnicodeArgs),
    166            true,
    167            nullptr,
    168            nullptr,
    169            nullptr,
    170            nullptr,
    171            nullptr,
    172            nullptr
    173    };
    174    UConverterFromUnicodeArgs fromUArgs = {
    175        sizeof(UConverterFromUnicodeArgs),
    176            true,
    177            nullptr,
    178            nullptr,
    179            nullptr,
    180            nullptr,
    181            nullptr,
    182            nullptr
    183    };
    184 
    185    UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
    186 
    187    if (status == nullptr || U_FAILURE(*status)){
    188        UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
    189        return nullptr;
    190    }
    191 
    192    if (cnv == nullptr) {
    193        *status = U_ILLEGAL_ARGUMENT_ERROR;
    194        UTRACE_EXIT_STATUS(*status);
    195        return nullptr;
    196    }
    197 
    198    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
    199                                    ucnv_getName(cnv, status), cnv, stackBuffer);
    200 
    201    if (cnv->sharedData->impl->safeClone != nullptr) {
    202        /* call the custom safeClone function for sizing */
    203        bufferSizeNeeded = 0;
    204        cnv->sharedData->impl->safeClone(cnv, nullptr, &bufferSizeNeeded, status);
    205        if (U_FAILURE(*status)) {
    206            UTRACE_EXIT_STATUS(*status);
    207            return nullptr;
    208        }
    209    }
    210    else
    211    {
    212        /* inherent sizing */
    213        bufferSizeNeeded = sizeof(UConverter);
    214    }
    215 
    216    if (pBufferSize == nullptr) {
    217        stackBufferSize = 1;
    218        pBufferSize = &stackBufferSize;
    219    } else {
    220        stackBufferSize = *pBufferSize;
    221        if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
    222            *pBufferSize = bufferSizeNeeded;
    223            UTRACE_EXIT_VALUE(bufferSizeNeeded);
    224            return nullptr;
    225        }
    226    }
    227 
    228    /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter.
    229     * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed.
    230     */
    231    if (stackBuffer) {
    232        uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer);
    233        uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1);
    234        ptrdiff_t pointerAdjustment = aligned_p - p;
    235        if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) {
    236            stackBuffer = reinterpret_cast<void *>(aligned_p);
    237            stackBufferSize -= static_cast<int32_t>(pointerAdjustment);
    238        } else {
    239            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
    240            stackBufferSize = 1;
    241        }
    242    }
    243 
    244    /* Now, see if we must allocate any memory */
    245    if (stackBufferSize < bufferSizeNeeded || stackBuffer == nullptr)
    246    {
    247        /* allocate one here...*/
    248        localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
    249 
    250        if(localConverter == nullptr) {
    251            *status = U_MEMORY_ALLOCATION_ERROR;
    252            UTRACE_EXIT_STATUS(*status);
    253            return nullptr;
    254        }
    255        // If pBufferSize was nullptr as the input, pBufferSize is set to &stackBufferSize in this function.
    256        if (pBufferSize != &stackBufferSize) {
    257            *status = U_SAFECLONE_ALLOCATED_WARNING;
    258        }
    259 
    260        /* record the fact that memory was allocated */
    261        *pBufferSize = bufferSizeNeeded;
    262    } else {
    263        /* just use the stack buffer */
    264        localConverter = (UConverter*) stackBuffer;
    265        allocatedConverter = nullptr;
    266    }
    267 
    268    uprv_memset(localConverter, 0, bufferSizeNeeded);
    269 
    270    /* Copy initial state */
    271    uprv_memcpy(localConverter, cnv, sizeof(UConverter));
    272    localConverter->isCopyLocal = localConverter->isExtraLocal = false;
    273 
    274    /* copy the substitution string */
    275    if (cnv->subChars == (uint8_t *)cnv->subUChars) {
    276        localConverter->subChars = (uint8_t *)localConverter->subUChars;
    277    } else {
    278        localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
    279        if (localConverter->subChars == nullptr) {
    280            uprv_free(allocatedConverter);
    281            UTRACE_EXIT_STATUS(*status);
    282            return nullptr;
    283        }
    284        uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
    285    }
    286 
    287    /* now either call the safeclone fcn or not */
    288    if (cnv->sharedData->impl->safeClone != nullptr) {
    289        /* call the custom safeClone function */
    290        localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
    291    }
    292 
    293    if(localConverter==nullptr || U_FAILURE(*status)) {
    294        if (allocatedConverter != nullptr && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
    295            uprv_free(allocatedConverter->subChars);
    296        }
    297        uprv_free(allocatedConverter);
    298        UTRACE_EXIT_STATUS(*status);
    299        return nullptr;
    300    }
    301 
    302    /* increment refcount of shared data if needed */
    303    if (cnv->sharedData->isReferenceCounted) {
    304        ucnv_incrementRefCount(cnv->sharedData);
    305    }
    306 
    307    if(localConverter == (UConverter*)stackBuffer) {
    308        /* we're using user provided data - set to not destroy */
    309        localConverter->isCopyLocal = true;
    310    }
    311 
    312    /* allow callback functions to handle any memory allocation */
    313    toUArgs.converter = fromUArgs.converter = localConverter;
    314    cbErr = U_ZERO_ERROR;
    315    cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, nullptr, 0, UCNV_CLONE, &cbErr);
    316    cbErr = U_ZERO_ERROR;
    317    cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_CLONE, &cbErr);
    318 
    319    UTRACE_EXIT_PTR_STATUS(localConverter, *status);
    320    return localConverter;
    321 }
    322 
    323 U_CAPI UConverter* U_EXPORT2
    324 ucnv_clone(const UConverter* cnv, UErrorCode *status)
    325 {
    326    return ucnv_safeClone(cnv, nullptr, nullptr, status);
    327 }
    328 
    329 /*Decreases the reference counter in the shared immutable section of the object
    330 *and frees the mutable part*/
    331 
    332 U_CAPI void  U_EXPORT2
    333 ucnv_close (UConverter * converter)
    334 {
    335    UErrorCode errorCode = U_ZERO_ERROR;
    336 
    337    UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
    338 
    339    if (converter == nullptr)
    340    {
    341        UTRACE_EXIT();
    342        return;
    343    }
    344 
    345    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
    346        ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
    347 
    348    /* In order to speed up the close, only call the callbacks when they have been changed.
    349    This performance check will only work when the callbacks are set within a shared library
    350    or from user code that statically links this code. */
    351    /* first, notify the callback functions that the converter is closed */
    352    if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
    353        UConverterToUnicodeArgs toUArgs = {
    354            sizeof(UConverterToUnicodeArgs),
    355                true,
    356                nullptr,
    357                nullptr,
    358                nullptr,
    359                nullptr,
    360                nullptr,
    361                nullptr
    362        };
    363 
    364        toUArgs.converter = converter;
    365        errorCode = U_ZERO_ERROR;
    366        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, nullptr, 0, UCNV_CLOSE, &errorCode);
    367    }
    368    if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
    369        UConverterFromUnicodeArgs fromUArgs = {
    370            sizeof(UConverterFromUnicodeArgs),
    371                true,
    372                nullptr,
    373                nullptr,
    374                nullptr,
    375                nullptr,
    376                nullptr,
    377                nullptr
    378        };
    379        fromUArgs.converter = converter;
    380        errorCode = U_ZERO_ERROR;
    381        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_CLOSE, &errorCode);
    382    }
    383 
    384    if (converter->sharedData->impl->close != nullptr) {
    385        converter->sharedData->impl->close(converter);
    386    }
    387 
    388    if (converter->subChars != (uint8_t *)converter->subUChars) {
    389        uprv_free(converter->subChars);
    390    }
    391 
    392    if (converter->sharedData->isReferenceCounted) {
    393        ucnv_unloadSharedDataIfReady(converter->sharedData);
    394    }
    395 
    396    if(!converter->isCopyLocal){
    397        uprv_free(converter);
    398    }
    399 
    400    UTRACE_EXIT();
    401 }
    402 
    403 /*returns a single Name from the list, will return nullptr if out of bounds
    404 */
    405 U_CAPI const char*   U_EXPORT2
    406 ucnv_getAvailableName (int32_t n)
    407 {
    408    if (0 <= n && n <= 0xffff) {
    409        UErrorCode err = U_ZERO_ERROR;
    410        const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
    411        if (U_SUCCESS(err)) {
    412            return name;
    413        }
    414    }
    415    return nullptr;
    416 }
    417 
    418 U_CAPI int32_t   U_EXPORT2
    419 ucnv_countAvailable ()
    420 {
    421    UErrorCode err = U_ZERO_ERROR;
    422    return ucnv_bld_countAvailableConverters(&err);
    423 }
    424 
    425 U_CAPI void    U_EXPORT2
    426 ucnv_getSubstChars (const UConverter * converter,
    427                    char *mySubChar,
    428                    int8_t * len,
    429                    UErrorCode * err)
    430 {
    431    if (U_FAILURE (*err))
    432        return;
    433 
    434    if (converter->subCharLen <= 0) {
    435        /* Unicode string or empty string from ucnv_setSubstString(). */
    436        *len = 0;
    437        return;
    438    }
    439 
    440    if (*len < converter->subCharLen) /*not enough space in subChars */
    441    {
    442        *err = U_INDEX_OUTOFBOUNDS_ERROR;
    443        return;
    444    }
    445 
    446    uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
    447    *len = converter->subCharLen; /*store # of bytes copied to buffer */
    448 }
    449 
    450 U_CAPI void    U_EXPORT2
    451 ucnv_setSubstChars (UConverter * converter,
    452                    const char *mySubChar,
    453                    int8_t len,
    454                    UErrorCode * err)
    455 {
    456    if (U_FAILURE (*err))
    457        return;
    458    
    459    /*Makes sure that the subChar is within the codepages char length boundaries */
    460    if ((len > converter->sharedData->staticData->maxBytesPerChar)
    461     || (len < converter->sharedData->staticData->minBytesPerChar))
    462    {
    463        *err = U_ILLEGAL_ARGUMENT_ERROR;
    464        return;
    465    }
    466    
    467    uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
    468    converter->subCharLen = len;  /*sets the new len */
    469 
    470    /*
    471    * There is currently (2001Feb) no separate API to set/get subChar1.
    472    * In order to always have subChar written after it is explicitly set,
    473    * we set subChar1 to 0.
    474    */
    475    converter->subChar1 = 0;
    476 }
    477 
    478 U_CAPI void U_EXPORT2
    479 ucnv_setSubstString(UConverter *cnv,
    480                    const char16_t *s,
    481                    int32_t length,
    482                    UErrorCode *err) {
    483    alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE];
    484    char chars[UCNV_ERROR_BUFFER_LENGTH];
    485 
    486    UConverter *clone;
    487    uint8_t *subChars;
    488    int32_t cloneSize, length8;
    489 
    490    /* Let the following functions check all arguments. */
    491    cloneSize = sizeof(cloneBuffer);
    492    clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
    493    ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, nullptr, nullptr, nullptr, err);
    494    length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
    495    ucnv_close(clone);
    496    if (U_FAILURE(*err)) {
    497        return;
    498    }
    499 
    500    if (cnv->sharedData->impl->writeSub == nullptr
    501 #if !UCONFIG_NO_LEGACY_CONVERSION
    502        || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
    503         ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
    504 #endif
    505    ) {
    506        /* The converter is not stateful. Store the charset bytes as a fixed string. */
    507        subChars = (uint8_t *)chars;
    508    } else {
    509        /*
    510         * The converter has a non-default writeSub() function, indicating
    511         * that it is stateful.
    512         * Store the Unicode string for on-the-fly conversion for correct
    513         * state handling.
    514         */
    515        if (length > UCNV_ERROR_BUFFER_LENGTH) {
    516            /*
    517             * Should not occur. The converter should output at least one byte
    518             * per char16_t, which means that ucnv_fromUChars() should catch all
    519             * overflows.
    520             */
    521            *err = U_BUFFER_OVERFLOW_ERROR;
    522            return;
    523        }
    524        subChars = (uint8_t *)s;
    525        if (length < 0) {
    526            length = u_strlen(s);
    527        }
    528        length8 = length * U_SIZEOF_UCHAR;
    529    }
    530 
    531    /*
    532     * For storing the substitution string, select either the small buffer inside
    533     * UConverter or allocate a subChars buffer.
    534     */
    535    if (length8 > UCNV_MAX_SUBCHAR_LEN) {
    536        /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
    537        if (cnv->subChars == (uint8_t *)cnv->subUChars) {
    538            /* Allocate a new buffer for the string. */
    539            cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
    540            if (cnv->subChars == nullptr) {
    541                cnv->subChars = (uint8_t *)cnv->subUChars;
    542                *err = U_MEMORY_ALLOCATION_ERROR;
    543                return;
    544            }
    545            uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
    546        }
    547    }
    548 
    549    /* Copy the substitution string into the UConverter or its subChars buffer. */
    550    if (length8 == 0) {
    551        cnv->subCharLen = 0;
    552    } else {
    553        uprv_memcpy(cnv->subChars, subChars, length8);
    554        if (subChars == (uint8_t *)chars) {
    555            cnv->subCharLen = (int8_t)length8;
    556        } else /* subChars == s */ {
    557            cnv->subCharLen = (int8_t)-length;
    558        }
    559    }
    560 
    561    /* See comment in ucnv_setSubstChars(). */
    562    cnv->subChar1 = 0;
    563 }
    564 
    565 /*resets the internal states of a converter
    566 *goal : have the same behaviour than a freshly created converter
    567 */
    568 static void _reset(UConverter *converter, UConverterResetChoice choice,
    569                   UBool callCallback) {
    570    if(converter == nullptr) {
    571        return;
    572    }
    573 
    574    if(callCallback) {
    575        /* first, notify the callback functions that the converter is reset */
    576        UErrorCode errorCode;
    577 
    578        if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
    579            UConverterToUnicodeArgs toUArgs = {
    580                sizeof(UConverterToUnicodeArgs),
    581                true,
    582                nullptr,
    583                nullptr,
    584                nullptr,
    585                nullptr,
    586                nullptr,
    587                nullptr
    588            };
    589            toUArgs.converter = converter;
    590            errorCode = U_ZERO_ERROR;
    591            converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, nullptr, 0, UCNV_RESET, &errorCode);
    592        }
    593        if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
    594            UConverterFromUnicodeArgs fromUArgs = {
    595                sizeof(UConverterFromUnicodeArgs),
    596                true,
    597                nullptr,
    598                nullptr,
    599                nullptr,
    600                nullptr,
    601                nullptr,
    602                nullptr
    603            };
    604            fromUArgs.converter = converter;
    605            errorCode = U_ZERO_ERROR;
    606            converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_RESET, &errorCode);
    607        }
    608    }
    609 
    610    /* now reset the converter itself */
    611    if(choice<=UCNV_RESET_TO_UNICODE) {
    612        converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
    613        converter->mode = 0;
    614        converter->toULength = 0;
    615        converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
    616        converter->preToULength = 0;
    617    }
    618    if(choice!=UCNV_RESET_TO_UNICODE) {
    619        converter->fromUnicodeStatus = 0;
    620        converter->fromUChar32 = 0;
    621        converter->invalidUCharLength = converter->charErrorBufferLength = 0;
    622        converter->preFromUFirstCP = U_SENTINEL;
    623        converter->preFromULength = 0;
    624    }
    625 
    626    if (converter->sharedData->impl->reset != nullptr) {
    627        /* call the custom reset function */
    628        converter->sharedData->impl->reset(converter, choice);
    629    }
    630 }
    631 
    632 U_CAPI void  U_EXPORT2
    633 ucnv_reset(UConverter *converter)
    634 {
    635    _reset(converter, UCNV_RESET_BOTH, true);
    636 }
    637 
    638 U_CAPI void  U_EXPORT2
    639 ucnv_resetToUnicode(UConverter *converter)
    640 {
    641    _reset(converter, UCNV_RESET_TO_UNICODE, true);
    642 }
    643 
    644 U_CAPI void  U_EXPORT2
    645 ucnv_resetFromUnicode(UConverter *converter)
    646 {
    647    _reset(converter, UCNV_RESET_FROM_UNICODE, true);
    648 }
    649 
    650 U_CAPI int8_t   U_EXPORT2
    651 ucnv_getMaxCharSize (const UConverter * converter)
    652 {
    653    return converter->maxBytesPerUChar;
    654 }
    655 
    656 
    657 U_CAPI int8_t   U_EXPORT2
    658 ucnv_getMinCharSize (const UConverter * converter)
    659 {
    660    return converter->sharedData->staticData->minBytesPerChar;
    661 }
    662 
    663 U_CAPI const char*   U_EXPORT2
    664 ucnv_getName (const UConverter * converter, UErrorCode * err)
    665     
    666 {
    667    if (U_FAILURE (*err))
    668        return nullptr;
    669    if(converter->sharedData->impl->getName){
    670        const char* temp= converter->sharedData->impl->getName(converter);
    671        if(temp)
    672            return temp;
    673    }
    674    return converter->sharedData->staticData->name;
    675 }
    676 
    677 U_CAPI int32_t U_EXPORT2
    678 ucnv_getCCSID(const UConverter * converter,
    679              UErrorCode * err)
    680 {
    681    int32_t ccsid;
    682    if (U_FAILURE (*err))
    683        return -1;
    684 
    685    ccsid = converter->sharedData->staticData->codepage;
    686    if (ccsid == 0) {
    687        /* Rare case. This is for cases like gb18030,
    688        which doesn't have an IBM canonical name, but does have an IBM alias. */
    689        const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
    690        if (U_SUCCESS(*err) && standardName) {
    691            const char *ccsidStr = uprv_strchr(standardName, '-');
    692            if (ccsidStr) {
    693                ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
    694            }
    695        }
    696    }
    697    return ccsid;
    698 }
    699 
    700 
    701 U_CAPI UConverterPlatform   U_EXPORT2
    702 ucnv_getPlatform (const UConverter * converter,
    703                                      UErrorCode * err)
    704 {
    705    if (U_FAILURE (*err))
    706        return UCNV_UNKNOWN;
    707 
    708    return (UConverterPlatform)converter->sharedData->staticData->platform;
    709 }
    710 
    711 U_CAPI void U_EXPORT2
    712    ucnv_getToUCallBack (const UConverter * converter,
    713                         UConverterToUCallback *action,
    714                         const void **context)
    715 {
    716    *action = converter->fromCharErrorBehaviour;
    717    *context = converter->toUContext;
    718 }
    719 
    720 U_CAPI void U_EXPORT2
    721    ucnv_getFromUCallBack (const UConverter * converter,
    722                           UConverterFromUCallback *action,
    723                           const void **context)
    724 {
    725    *action = converter->fromUCharErrorBehaviour;
    726    *context = converter->fromUContext;
    727 }
    728 
    729 U_CAPI void    U_EXPORT2
    730 ucnv_setToUCallBack (UConverter * converter,
    731                            UConverterToUCallback newAction,
    732                            const void* newContext,
    733                            UConverterToUCallback *oldAction,
    734                            const void** oldContext,
    735                            UErrorCode * err)
    736 {
    737    if (U_FAILURE (*err))
    738        return;
    739    if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
    740    converter->fromCharErrorBehaviour = newAction;
    741    if (oldContext) *oldContext = converter->toUContext;
    742    converter->toUContext = newContext;
    743 }
    744 
    745 U_CAPI void  U_EXPORT2
    746 ucnv_setFromUCallBack (UConverter * converter,
    747                            UConverterFromUCallback newAction,
    748                            const void* newContext,
    749                            UConverterFromUCallback *oldAction,
    750                            const void** oldContext,
    751                            UErrorCode * err)
    752 {
    753    if (U_FAILURE (*err))
    754        return;
    755    if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
    756    converter->fromUCharErrorBehaviour = newAction;
    757    if (oldContext) *oldContext = converter->fromUContext;
    758    converter->fromUContext = newContext;
    759 }
    760 
    761 static void
    762 _updateOffsets(int32_t *offsets, int32_t length,
    763               int32_t sourceIndex, int32_t errorInputLength) {
    764    int32_t *limit;
    765    int32_t delta, offset;
    766 
    767    if(sourceIndex>=0) {
    768        /*
    769         * adjust each offset by adding the previous sourceIndex
    770         * minus the length of the input sequence that caused an
    771         * error, if any
    772         */
    773        delta=sourceIndex-errorInputLength;
    774    } else {
    775        /*
    776         * set each offset to -1 because this conversion function
    777         * does not handle offsets
    778         */
    779        delta=-1;
    780    }
    781 
    782    limit=offsets+length;
    783    if(delta==0) {
    784        /* most common case, nothing to do */
    785    } else if(delta>0) {
    786        /* add the delta to each offset (but not if the offset is <0) */
    787        while(offsets<limit) {
    788            offset=*offsets;
    789            if(offset>=0) {
    790                *offsets=offset+delta;
    791            }
    792            ++offsets;
    793        }
    794    } else /* delta<0 */ {
    795        /*
    796         * set each offset to -1 because this conversion function
    797         * does not handle offsets
    798         * or the error input sequence started in a previous buffer
    799         */
    800        while(offsets<limit) {
    801            *offsets++=-1;
    802        }
    803    }
    804 }
    805 
    806 /* ucnv_fromUnicode --------------------------------------------------------- */
    807 
    808 /*
    809 * Implementation note for m:n conversions
    810 *
    811 * While collecting source units to find the longest match for m:n conversion,
    812 * some source units may need to be stored for a partial match.
    813 * When a second buffer does not yield a match on all of the previously stored
    814 * source units, then they must be "replayed", i.e., fed back into the converter.
    815 *
    816 * The code relies on the fact that replaying will not nest -
    817 * converting a replay buffer will not result in a replay.
    818 * This is because a replay is necessary only after the _continuation_ of a
    819 * partial match failed, but a replay buffer is converted as a whole.
    820 * It may result in some of its units being stored again for a partial match,
    821 * but there will not be a continuation _during_ the replay which could fail.
    822 *
    823 * It is conceivable that a callback function could call the converter
    824 * recursively in a way that causes another replay to be stored, but that
    825 * would be an error in the callback function.
    826 * Such violations will cause assertion failures in a debug build,
    827 * and wrong output, but they will not cause a crash.
    828 */
    829 
    830 static void
    831 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
    832    UConverterFromUnicode fromUnicode;
    833    UConverter *cnv;
    834    const char16_t *s;
    835    char *t;
    836    int32_t *offsets;
    837    int32_t sourceIndex;
    838    int32_t errorInputLength;
    839    UBool converterSawEndOfInput, calledCallback;
    840 
    841    /* variables for m:n conversion */
    842    char16_t replay[UCNV_EXT_MAX_UCHARS];
    843    const char16_t *realSource, *realSourceLimit;
    844    int32_t realSourceIndex;
    845    UBool realFlush;
    846 
    847    cnv=pArgs->converter;
    848    s=pArgs->source;
    849    t=pArgs->target;
    850    offsets=pArgs->offsets;
    851 
    852    /* get the converter implementation function */
    853    sourceIndex=0;
    854    if(offsets==nullptr) {
    855        fromUnicode=cnv->sharedData->impl->fromUnicode;
    856    } else {
    857        fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
    858        if(fromUnicode==nullptr) {
    859            /* there is no WithOffsets implementation */
    860            fromUnicode=cnv->sharedData->impl->fromUnicode;
    861            /* we will write -1 for each offset */
    862            sourceIndex=-1;
    863        }
    864    }
    865 
    866    if(cnv->preFromULength>=0) {
    867        /* normal mode */
    868        realSource=nullptr;
    869 
    870        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
    871        realSourceLimit=nullptr;
    872        realFlush=false;
    873        realSourceIndex=0;
    874    } else {
    875        /*
    876         * Previous m:n conversion stored source units from a partial match
    877         * and failed to consume all of them.
    878         * We need to "replay" them from a temporary buffer and convert them first.
    879         */
    880        realSource=pArgs->source;
    881        realSourceLimit=pArgs->sourceLimit;
    882        realFlush=pArgs->flush;
    883        realSourceIndex=sourceIndex;
    884 
    885        uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
    886        pArgs->source=replay;
    887        pArgs->sourceLimit=replay-cnv->preFromULength;
    888        pArgs->flush=false;
    889        sourceIndex=-1;
    890 
    891        cnv->preFromULength=0;
    892    }
    893 
    894    /*
    895     * loop for conversion and error handling
    896     *
    897     * loop {
    898     *   convert
    899     *   loop {
    900     *     update offsets
    901     *     handle end of input
    902     *     handle errors/call callback
    903     *   }
    904     * }
    905     */
    906    for(;;) {
    907        if(U_SUCCESS(*err)) {
    908            /* convert */
    909            fromUnicode(pArgs, err);
    910 
    911            /*
    912             * set a flag for whether the converter
    913             * successfully processed the end of the input
    914             *
    915             * need not check cnv->preFromULength==0 because a replay (<0) will cause
    916             * s<sourceLimit before converterSawEndOfInput is checked
    917             */
    918            converterSawEndOfInput=
    919                static_cast<UBool>(U_SUCCESS(*err) &&
    920                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
    921                        cnv->fromUChar32==0);
    922        } else {
    923            /* handle error from ucnv_convertEx() */
    924            converterSawEndOfInput=false;
    925        }
    926 
    927        /* no callback called yet for this iteration */
    928        calledCallback=false;
    929 
    930        /* no sourceIndex adjustment for conversion, only for callback output */
    931        errorInputLength=0;
    932 
    933        /*
    934         * loop for offsets and error handling
    935         *
    936         * iterates at most 3 times:
    937         * 1. to clean up after the conversion function
    938         * 2. after the callback
    939         * 3. after the callback again if there was truncated input
    940         */
    941        for(;;) {
    942            /* update offsets if we write any */
    943            if(offsets!=nullptr) {
    944                int32_t length = static_cast<int32_t>(pArgs->target - t);
    945                if(length>0) {
    946                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
    947 
    948                    /*
    949                     * if a converter handles offsets and updates the offsets
    950                     * pointer at the end, then pArgs->offset should not change
    951                     * here;
    952                     * however, some converters do not handle offsets at all
    953                     * (sourceIndex<0) or may not update the offsets pointer
    954                     */
    955                    pArgs->offsets=offsets+=length;
    956                }
    957 
    958                if(sourceIndex>=0) {
    959                    sourceIndex += static_cast<int32_t>(pArgs->source - s);
    960                }
    961            }
    962 
    963            if(cnv->preFromULength<0) {
    964                /*
    965                 * switch the source to new replay units (cannot occur while replaying)
    966                 * after offset handling and before end-of-input and callback handling
    967                 */
    968                if(realSource==nullptr) {
    969                    realSource=pArgs->source;
    970                    realSourceLimit=pArgs->sourceLimit;
    971                    realFlush=pArgs->flush;
    972                    realSourceIndex=sourceIndex;
    973 
    974                    uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
    975                    pArgs->source=replay;
    976                    pArgs->sourceLimit=replay-cnv->preFromULength;
    977                    pArgs->flush=false;
    978                    if((sourceIndex+=cnv->preFromULength)<0) {
    979                        sourceIndex=-1;
    980                    }
    981 
    982                    cnv->preFromULength=0;
    983                } else {
    984                    /* see implementation note before _fromUnicodeWithCallback() */
    985                    U_ASSERT(realSource==nullptr);
    986                    *err=U_INTERNAL_PROGRAM_ERROR;
    987                }
    988            }
    989 
    990            /* update pointers */
    991            s=pArgs->source;
    992            t=pArgs->target;
    993 
    994            if(U_SUCCESS(*err)) {
    995                if(s<pArgs->sourceLimit) {
    996                    /*
    997                     * continue with the conversion loop while there is still input left
    998                     * (continue converting by breaking out of only the inner loop)
    999                     */
   1000                    break;
   1001                } else if(realSource!=nullptr) {
   1002                    /* switch back from replaying to the real source and continue */
   1003                    pArgs->source=realSource;
   1004                    pArgs->sourceLimit=realSourceLimit;
   1005                    pArgs->flush=realFlush;
   1006                    sourceIndex=realSourceIndex;
   1007 
   1008                    realSource=nullptr;
   1009                    break;
   1010                } else if(pArgs->flush && cnv->fromUChar32!=0) {
   1011                    /*
   1012                     * the entire input stream is consumed
   1013                     * and there is a partial, truncated input sequence left
   1014                     */
   1015 
   1016                    /* inject an error and continue with callback handling */
   1017                    *err=U_TRUNCATED_CHAR_FOUND;
   1018                    calledCallback=false; /* new error condition */
   1019                } else {
   1020                    /* input consumed */
   1021                    if(pArgs->flush) {
   1022                        /*
   1023                         * return to the conversion loop once more if the flush
   1024                         * flag is set and the conversion function has not
   1025                         * successfully processed the end of the input yet
   1026                         *
   1027                         * (continue converting by breaking out of only the inner loop)
   1028                         */
   1029                        if(!converterSawEndOfInput) {
   1030                            break;
   1031                        }
   1032 
   1033                        /* reset the converter without calling the callback function */
   1034                        _reset(cnv, UCNV_RESET_FROM_UNICODE, false);
   1035                    }
   1036 
   1037                    /* done successfully */
   1038                    return;
   1039                }
   1040            }
   1041 
   1042            /* U_FAILURE(*err) */
   1043            {
   1044                UErrorCode e;
   1045 
   1046                if( calledCallback ||
   1047                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
   1048                    (e!=U_INVALID_CHAR_FOUND &&
   1049                     e!=U_ILLEGAL_CHAR_FOUND &&
   1050                     e!=U_TRUNCATED_CHAR_FOUND)
   1051                ) {
   1052                    /*
   1053                     * the callback did not or cannot resolve the error:
   1054                     * set output pointers and return
   1055                     *
   1056                     * the check for buffer overflow is redundant but it is
   1057                     * a high-runner case and hopefully documents the intent
   1058                     * well
   1059                     *
   1060                     * if we were replaying, then the replay buffer must be
   1061                     * copied back into the UConverter
   1062                     * and the real arguments must be restored
   1063                     */
   1064                    if(realSource!=nullptr) {
   1065                        int32_t length;
   1066 
   1067                        U_ASSERT(cnv->preFromULength==0);
   1068 
   1069                        length = static_cast<int32_t>(pArgs->sourceLimit - pArgs->source);
   1070                        if(length>0) {
   1071                            u_memcpy(cnv->preFromU, pArgs->source, length);
   1072                            cnv->preFromULength = static_cast<int8_t>(-length);
   1073                        }
   1074 
   1075                        pArgs->source=realSource;
   1076                        pArgs->sourceLimit=realSourceLimit;
   1077                        pArgs->flush=realFlush;
   1078                    }
   1079 
   1080                    return;
   1081                }
   1082            }
   1083 
   1084            /* callback handling */
   1085            {
   1086                UChar32 codePoint;
   1087 
   1088                /* get and write the code point */
   1089                codePoint=cnv->fromUChar32;
   1090                errorInputLength=0;
   1091                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
   1092                cnv->invalidUCharLength = static_cast<int8_t>(errorInputLength);
   1093 
   1094                /* set the converter state to deal with the next character */
   1095                cnv->fromUChar32=0;
   1096 
   1097                /* call the callback function */
   1098                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
   1099                    cnv->invalidUCharBuffer, errorInputLength, codePoint,
   1100                    *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
   1101                    err);
   1102            }
   1103 
   1104            /*
   1105             * loop back to the offset handling
   1106             *
   1107             * this flag will indicate after offset handling
   1108             * that a callback was called;
   1109             * if the callback did not resolve the error, then we return
   1110             */
   1111            calledCallback=true;
   1112        }
   1113    }
   1114 }
   1115 
   1116 /*
   1117 * Output the fromUnicode overflow buffer.
   1118 * Call this function if(cnv->charErrorBufferLength>0).
   1119 * @return true if overflow
   1120 */
   1121 static UBool
   1122 ucnv_outputOverflowFromUnicode(UConverter *cnv,
   1123                               char **target, const char *targetLimit,
   1124                               int32_t **pOffsets,
   1125                               UErrorCode *err) {
   1126    int32_t *offsets;
   1127    char *overflow, *t;
   1128    int32_t i, length;
   1129 
   1130    t=*target;
   1131    if(pOffsets!=nullptr) {
   1132        offsets=*pOffsets;
   1133    } else {
   1134        offsets=nullptr;
   1135    }
   1136 
   1137    overflow = reinterpret_cast<char*>(cnv->charErrorBuffer);
   1138    length=cnv->charErrorBufferLength;
   1139    i=0;
   1140    while(i<length) {
   1141        if(t==targetLimit) {
   1142            /* the overflow buffer contains too much, keep the rest */
   1143            int32_t j=0;
   1144 
   1145            do {
   1146                overflow[j++]=overflow[i++];
   1147            } while(i<length);
   1148 
   1149            cnv->charErrorBufferLength = static_cast<int8_t>(j);
   1150            *target=t;
   1151            if(offsets!=nullptr) {
   1152                *pOffsets=offsets;
   1153            }
   1154            *err=U_BUFFER_OVERFLOW_ERROR;
   1155            return true;
   1156        }
   1157 
   1158        /* copy the overflow contents to the target */
   1159        *t++=overflow[i++];
   1160        if(offsets!=nullptr) {
   1161            *offsets++=-1; /* no source index available for old output */
   1162        }
   1163    }
   1164 
   1165    /* the overflow buffer is completely copied to the target */
   1166    cnv->charErrorBufferLength=0;
   1167    *target=t;
   1168    if(offsets!=nullptr) {
   1169        *pOffsets=offsets;
   1170    }
   1171    return false;
   1172 }
   1173 
   1174 U_CAPI void U_EXPORT2
   1175 ucnv_fromUnicode(UConverter *cnv,
   1176                 char **target, const char *targetLimit,
   1177                 const char16_t **source, const char16_t *sourceLimit,
   1178                 int32_t *offsets,
   1179                 UBool flush,
   1180                 UErrorCode *err) {
   1181    UConverterFromUnicodeArgs args;
   1182    const char16_t *s;
   1183    char *t;
   1184 
   1185    /* check parameters */
   1186    if(err==nullptr || U_FAILURE(*err)) {
   1187        return;
   1188    }
   1189 
   1190    if(cnv==nullptr || target==nullptr || source==nullptr) {
   1191        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1192        return;
   1193    }
   1194 
   1195    s=*source;
   1196    t=*target;
   1197 
   1198    if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
   1199        /*
   1200        Prevent code from going into an infinite loop in case we do hit this
   1201        limit. The limit pointer is expected to be on a char16_t * boundary.
   1202        This also prevents the next argument check from failing.
   1203        */
   1204        sourceLimit = (const char16_t *)(((const char *)sourceLimit) - 1);
   1205    }
   1206 
   1207    /*
   1208     * All these conditions should never happen.
   1209     *
   1210     * 1) Make sure that the limits are >= to the address source or target
   1211     *
   1212     * 2) Make sure that the buffer sizes do not exceed the number range for
   1213     * int32_t because some functions use the size (in units or bytes)
   1214     * rather than comparing pointers, and because offsets are int32_t values.
   1215     *
   1216     * size_t is guaranteed to be unsigned and large enough for the job.
   1217     *
   1218     * Return with an error instead of adjusting the limits because we would
   1219     * not be able to maintain the semantics that either the source must be
   1220     * consumed or the target filled (unless an error occurs).
   1221     * An adjustment would be targetLimit=t+0x7fffffff; for example.
   1222     *
   1223     * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer
   1224     * to a char * pointer and provide an incomplete char16_t code unit.
   1225     */
   1226    if (sourceLimit<s || targetLimit<t ||
   1227        ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
   1228        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
   1229        (((const char *)sourceLimit-(const char *)s) & 1) != 0)
   1230    {
   1231        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1232        return;
   1233    }
   1234    
   1235    /* output the target overflow buffer */
   1236    if( cnv->charErrorBufferLength>0 &&
   1237        ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
   1238    ) {
   1239        /* U_BUFFER_OVERFLOW_ERROR */
   1240        return;
   1241    }
   1242    /* *target may have moved, therefore stop using t */
   1243 
   1244    if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
   1245        /* the overflow buffer is emptied and there is no new input: we are done */
   1246        return;
   1247    }
   1248 
   1249    /*
   1250     * Do not simply return with a buffer overflow error if
   1251     * !flush && t==targetLimit
   1252     * because it is possible that the source will not generate any output.
   1253     * For example, the skip callback may be called;
   1254     * it does not output anything.
   1255     */
   1256 
   1257    /* prepare the converter arguments */
   1258    args.converter=cnv;
   1259    args.flush=flush;
   1260    args.offsets=offsets;
   1261    args.source=s;
   1262    args.sourceLimit=sourceLimit;
   1263    args.target=*target;
   1264    args.targetLimit=targetLimit;
   1265    args.size=sizeof(args);
   1266 
   1267    _fromUnicodeWithCallback(&args, err);
   1268 
   1269    *source=args.source;
   1270    *target=args.target;
   1271 }
   1272 
   1273 /* ucnv_toUnicode() --------------------------------------------------------- */
   1274 
   1275 static void
   1276 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
   1277    UConverterToUnicode toUnicode;
   1278    UConverter *cnv;
   1279    const char *s;
   1280    char16_t *t;
   1281    int32_t *offsets;
   1282    int32_t sourceIndex;
   1283    int32_t errorInputLength;
   1284    UBool converterSawEndOfInput, calledCallback;
   1285 
   1286    /* variables for m:n conversion */
   1287    char replay[UCNV_EXT_MAX_BYTES];
   1288    const char *realSource, *realSourceLimit;
   1289    int32_t realSourceIndex;
   1290    UBool realFlush;
   1291 
   1292    cnv=pArgs->converter;
   1293    s=pArgs->source;
   1294    t=pArgs->target;
   1295    offsets=pArgs->offsets;
   1296 
   1297    /* get the converter implementation function */
   1298    sourceIndex=0;
   1299    if(offsets==nullptr) {
   1300        toUnicode=cnv->sharedData->impl->toUnicode;
   1301    } else {
   1302        toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
   1303        if(toUnicode==nullptr) {
   1304            /* there is no WithOffsets implementation */
   1305            toUnicode=cnv->sharedData->impl->toUnicode;
   1306            /* we will write -1 for each offset */
   1307            sourceIndex=-1;
   1308        }
   1309    }
   1310 
   1311    if(cnv->preToULength>=0) {
   1312        /* normal mode */
   1313        realSource=nullptr;
   1314 
   1315        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
   1316        realSourceLimit=nullptr;
   1317        realFlush=false;
   1318        realSourceIndex=0;
   1319    } else {
   1320        /*
   1321         * Previous m:n conversion stored source units from a partial match
   1322         * and failed to consume all of them.
   1323         * We need to "replay" them from a temporary buffer and convert them first.
   1324         */
   1325        realSource=pArgs->source;
   1326        realSourceLimit=pArgs->sourceLimit;
   1327        realFlush=pArgs->flush;
   1328        realSourceIndex=sourceIndex;
   1329 
   1330        uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
   1331        pArgs->source=replay;
   1332        pArgs->sourceLimit=replay-cnv->preToULength;
   1333        pArgs->flush=false;
   1334        sourceIndex=-1;
   1335 
   1336        cnv->preToULength=0;
   1337    }
   1338 
   1339    /*
   1340     * loop for conversion and error handling
   1341     *
   1342     * loop {
   1343     *   convert
   1344     *   loop {
   1345     *     update offsets
   1346     *     handle end of input
   1347     *     handle errors/call callback
   1348     *   }
   1349     * }
   1350     */
   1351    for(;;) {
   1352        if(U_SUCCESS(*err)) {
   1353            /* convert */
   1354            toUnicode(pArgs, err);
   1355 
   1356            /*
   1357             * set a flag for whether the converter
   1358             * successfully processed the end of the input
   1359             *
   1360             * need not check cnv->preToULength==0 because a replay (<0) will cause
   1361             * s<sourceLimit before converterSawEndOfInput is checked
   1362             */
   1363            converterSawEndOfInput=
   1364                static_cast<UBool>(U_SUCCESS(*err) &&
   1365                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
   1366                        cnv->toULength==0);
   1367        } else {
   1368            /* handle error from getNextUChar() or ucnv_convertEx() */
   1369            converterSawEndOfInput=false;
   1370        }
   1371 
   1372        /* no callback called yet for this iteration */
   1373        calledCallback=false;
   1374 
   1375        /* no sourceIndex adjustment for conversion, only for callback output */
   1376        errorInputLength=0;
   1377 
   1378        /*
   1379         * loop for offsets and error handling
   1380         *
   1381         * iterates at most 3 times:
   1382         * 1. to clean up after the conversion function
   1383         * 2. after the callback
   1384         * 3. after the callback again if there was truncated input
   1385         */
   1386        for(;;) {
   1387            /* update offsets if we write any */
   1388            if(offsets!=nullptr) {
   1389                int32_t length = static_cast<int32_t>(pArgs->target - t);
   1390                if(length>0) {
   1391                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
   1392 
   1393                    /*
   1394                     * if a converter handles offsets and updates the offsets
   1395                     * pointer at the end, then pArgs->offset should not change
   1396                     * here;
   1397                     * however, some converters do not handle offsets at all
   1398                     * (sourceIndex<0) or may not update the offsets pointer
   1399                     */
   1400                    pArgs->offsets=offsets+=length;
   1401                }
   1402 
   1403                if(sourceIndex>=0) {
   1404                    sourceIndex += static_cast<int32_t>(pArgs->source - s);
   1405                }
   1406            }
   1407 
   1408            if(cnv->preToULength<0) {
   1409                /*
   1410                 * switch the source to new replay units (cannot occur while replaying)
   1411                 * after offset handling and before end-of-input and callback handling
   1412                 */
   1413                if(realSource==nullptr) {
   1414                    realSource=pArgs->source;
   1415                    realSourceLimit=pArgs->sourceLimit;
   1416                    realFlush=pArgs->flush;
   1417                    realSourceIndex=sourceIndex;
   1418 
   1419                    uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
   1420                    pArgs->source=replay;
   1421                    pArgs->sourceLimit=replay-cnv->preToULength;
   1422                    pArgs->flush=false;
   1423                    if((sourceIndex+=cnv->preToULength)<0) {
   1424                        sourceIndex=-1;
   1425                    }
   1426 
   1427                    cnv->preToULength=0;
   1428                } else {
   1429                    /* see implementation note before _fromUnicodeWithCallback() */
   1430                    U_ASSERT(realSource==nullptr);
   1431                    *err=U_INTERNAL_PROGRAM_ERROR;
   1432                }
   1433            }
   1434 
   1435            /* update pointers */
   1436            s=pArgs->source;
   1437            t=pArgs->target;
   1438 
   1439            if(U_SUCCESS(*err)) {
   1440                if(s<pArgs->sourceLimit) {
   1441                    /*
   1442                     * continue with the conversion loop while there is still input left
   1443                     * (continue converting by breaking out of only the inner loop)
   1444                     */
   1445                    break;
   1446                } else if(realSource!=nullptr) {
   1447                    /* switch back from replaying to the real source and continue */
   1448                    pArgs->source=realSource;
   1449                    pArgs->sourceLimit=realSourceLimit;
   1450                    pArgs->flush=realFlush;
   1451                    sourceIndex=realSourceIndex;
   1452 
   1453                    realSource=nullptr;
   1454                    break;
   1455                } else if(pArgs->flush && cnv->toULength>0) {
   1456                    /*
   1457                     * the entire input stream is consumed
   1458                     * and there is a partial, truncated input sequence left
   1459                     */
   1460 
   1461                    /* inject an error and continue with callback handling */
   1462                    *err=U_TRUNCATED_CHAR_FOUND;
   1463                    calledCallback=false; /* new error condition */
   1464                } else {
   1465                    /* input consumed */
   1466                    if(pArgs->flush) {
   1467                        /*
   1468                         * return to the conversion loop once more if the flush
   1469                         * flag is set and the conversion function has not
   1470                         * successfully processed the end of the input yet
   1471                         *
   1472                         * (continue converting by breaking out of only the inner loop)
   1473                         */
   1474                        if(!converterSawEndOfInput) {
   1475                            break;
   1476                        }
   1477 
   1478                        /* reset the converter without calling the callback function */
   1479                        _reset(cnv, UCNV_RESET_TO_UNICODE, false);
   1480                    }
   1481 
   1482                    /* done successfully */
   1483                    return;
   1484                }
   1485            }
   1486 
   1487            /* U_FAILURE(*err) */
   1488            {
   1489                UErrorCode e;
   1490 
   1491                if( calledCallback ||
   1492                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
   1493                    (e!=U_INVALID_CHAR_FOUND &&
   1494                     e!=U_ILLEGAL_CHAR_FOUND &&
   1495                     e!=U_TRUNCATED_CHAR_FOUND &&
   1496                     e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
   1497                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
   1498                ) {
   1499                    /*
   1500                     * the callback did not or cannot resolve the error:
   1501                     * set output pointers and return
   1502                     *
   1503                     * the check for buffer overflow is redundant but it is
   1504                     * a high-runner case and hopefully documents the intent
   1505                     * well
   1506                     *
   1507                     * if we were replaying, then the replay buffer must be
   1508                     * copied back into the UConverter
   1509                     * and the real arguments must be restored
   1510                     */
   1511                    if(realSource!=nullptr) {
   1512                        int32_t length;
   1513 
   1514                        U_ASSERT(cnv->preToULength==0);
   1515 
   1516                        length = static_cast<int32_t>(pArgs->sourceLimit - pArgs->source);
   1517                        if(length>0) {
   1518                            uprv_memcpy(cnv->preToU, pArgs->source, length);
   1519                            cnv->preToULength = static_cast<int8_t>(-length);
   1520                        }
   1521 
   1522                        pArgs->source=realSource;
   1523                        pArgs->sourceLimit=realSourceLimit;
   1524                        pArgs->flush=realFlush;
   1525                    }
   1526 
   1527                    return;
   1528                }
   1529            }
   1530 
   1531            /* copy toUBytes[] to invalidCharBuffer[] */
   1532            errorInputLength=cnv->invalidCharLength=cnv->toULength;
   1533            if(errorInputLength>0) {
   1534                uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
   1535            }
   1536 
   1537            /* set the converter state to deal with the next character */
   1538            cnv->toULength=0;
   1539 
   1540            /* call the callback function */
   1541            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
   1542                cnv->toUCallbackReason = UCNV_UNASSIGNED;
   1543            }
   1544            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
   1545                cnv->invalidCharBuffer, errorInputLength,
   1546                cnv->toUCallbackReason,
   1547                err);
   1548            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
   1549 
   1550            /*
   1551             * loop back to the offset handling
   1552             *
   1553             * this flag will indicate after offset handling
   1554             * that a callback was called;
   1555             * if the callback did not resolve the error, then we return
   1556             */
   1557            calledCallback=true;
   1558        }
   1559    }
   1560 }
   1561 
   1562 /*
   1563 * Output the toUnicode overflow buffer.
   1564 * Call this function if(cnv->UCharErrorBufferLength>0).
   1565 * @return true if overflow
   1566 */
   1567 static UBool
   1568 ucnv_outputOverflowToUnicode(UConverter *cnv,
   1569                             char16_t **target, const char16_t *targetLimit,
   1570                             int32_t **pOffsets,
   1571                             UErrorCode *err) {
   1572    int32_t *offsets;
   1573    char16_t *overflow, *t;
   1574    int32_t i, length;
   1575 
   1576    t=*target;
   1577    if(pOffsets!=nullptr) {
   1578        offsets=*pOffsets;
   1579    } else {
   1580        offsets=nullptr;
   1581    }
   1582 
   1583    overflow=cnv->UCharErrorBuffer;
   1584    length=cnv->UCharErrorBufferLength;
   1585    i=0;
   1586    while(i<length) {
   1587        if(t==targetLimit) {
   1588            /* the overflow buffer contains too much, keep the rest */
   1589            int32_t j=0;
   1590 
   1591            do {
   1592                overflow[j++]=overflow[i++];
   1593            } while(i<length);
   1594 
   1595            cnv->UCharErrorBufferLength = static_cast<int8_t>(j);
   1596            *target=t;
   1597            if(offsets!=nullptr) {
   1598                *pOffsets=offsets;
   1599            }
   1600            *err=U_BUFFER_OVERFLOW_ERROR;
   1601            return true;
   1602        }
   1603 
   1604        /* copy the overflow contents to the target */
   1605        *t++=overflow[i++];
   1606        if(offsets!=nullptr) {
   1607            *offsets++=-1; /* no source index available for old output */
   1608        }
   1609    }
   1610 
   1611    /* the overflow buffer is completely copied to the target */
   1612    cnv->UCharErrorBufferLength=0;
   1613    *target=t;
   1614    if(offsets!=nullptr) {
   1615        *pOffsets=offsets;
   1616    }
   1617    return false;
   1618 }
   1619 
   1620 U_CAPI void U_EXPORT2
   1621 ucnv_toUnicode(UConverter *cnv,
   1622               char16_t **target, const char16_t *targetLimit,
   1623               const char **source, const char *sourceLimit,
   1624               int32_t *offsets,
   1625               UBool flush,
   1626               UErrorCode *err) {
   1627    UConverterToUnicodeArgs args;
   1628    const char *s;
   1629    char16_t *t;
   1630 
   1631    /* check parameters */
   1632    if(err==nullptr || U_FAILURE(*err)) {
   1633        return;
   1634    }
   1635 
   1636    if(cnv==nullptr || target==nullptr || source==nullptr) {
   1637        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1638        return;
   1639    }
   1640 
   1641    s=*source;
   1642    t=*target;
   1643 
   1644    if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
   1645        /*
   1646        Prevent code from going into an infinite loop in case we do hit this
   1647        limit. The limit pointer is expected to be on a char16_t * boundary.
   1648        This also prevents the next argument check from failing.
   1649        */
   1650        targetLimit = (const char16_t *)(((const char *)targetLimit) - 1);
   1651    }
   1652 
   1653    /*
   1654     * All these conditions should never happen.
   1655     *
   1656     * 1) Make sure that the limits are >= to the address source or target
   1657     *
   1658     * 2) Make sure that the buffer sizes do not exceed the number range for
   1659     * int32_t because some functions use the size (in units or bytes)
   1660     * rather than comparing pointers, and because offsets are int32_t values.
   1661     *
   1662     * size_t is guaranteed to be unsigned and large enough for the job.
   1663     *
   1664     * Return with an error instead of adjusting the limits because we would
   1665     * not be able to maintain the semantics that either the source must be
   1666     * consumed or the target filled (unless an error occurs).
   1667     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
   1668     *
   1669     * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer
   1670     * to a char * pointer and provide an incomplete char16_t code unit.
   1671     */
   1672    if (sourceLimit<s || targetLimit<t ||
   1673        ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
   1674        ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
   1675        (((const char *)targetLimit-(const char *)t) & 1) != 0
   1676    ) {
   1677        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1678        return;
   1679    }
   1680    
   1681    /* output the target overflow buffer */
   1682    if( cnv->UCharErrorBufferLength>0 &&
   1683        ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
   1684    ) {
   1685        /* U_BUFFER_OVERFLOW_ERROR */
   1686        return;
   1687    }
   1688    /* *target may have moved, therefore stop using t */
   1689 
   1690    if(!flush && s==sourceLimit && cnv->preToULength>=0) {
   1691        /* the overflow buffer is emptied and there is no new input: we are done */
   1692        return;
   1693    }
   1694 
   1695    /*
   1696     * Do not simply return with a buffer overflow error if
   1697     * !flush && t==targetLimit
   1698     * because it is possible that the source will not generate any output.
   1699     * For example, the skip callback may be called;
   1700     * it does not output anything.
   1701     */
   1702 
   1703    /* prepare the converter arguments */
   1704    args.converter=cnv;
   1705    args.flush=flush;
   1706    args.offsets=offsets;
   1707    args.source=s;
   1708    args.sourceLimit=sourceLimit;
   1709    args.target=*target;
   1710    args.targetLimit=targetLimit;
   1711    args.size=sizeof(args);
   1712 
   1713    _toUnicodeWithCallback(&args, err);
   1714 
   1715    *source=args.source;
   1716    *target=args.target;
   1717 }
   1718 
   1719 /* ucnv_to/fromUChars() ----------------------------------------------------- */
   1720 
   1721 U_CAPI int32_t U_EXPORT2
   1722 ucnv_fromUChars(UConverter *cnv,
   1723                char *dest, int32_t destCapacity,
   1724                const char16_t *src, int32_t srcLength,
   1725                UErrorCode *pErrorCode) {
   1726    const char16_t *srcLimit;
   1727    char *originalDest, *destLimit;
   1728    int32_t destLength;
   1729 
   1730    /* check arguments */
   1731    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   1732        return 0;
   1733    }
   1734 
   1735    if( cnv==nullptr ||
   1736        destCapacity<0 || (destCapacity>0 && dest==nullptr) ||
   1737        srcLength<-1 || (srcLength!=0 && src==nullptr)
   1738    ) {
   1739        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1740        return 0;
   1741    }
   1742 
   1743    /* initialize */
   1744    ucnv_resetFromUnicode(cnv);
   1745    originalDest=dest;
   1746    if(srcLength==-1) {
   1747        srcLength=u_strlen(src);
   1748    }
   1749    if(srcLength>0) {
   1750        srcLimit=src+srcLength;
   1751        destCapacity=pinCapacity(dest, destCapacity);
   1752        destLimit=dest+destCapacity;
   1753 
   1754        /* perform the conversion */
   1755        UErrorCode bufferStatus = U_ZERO_ERROR;
   1756        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
   1757        destLength=(int32_t)(dest-originalDest);
   1758 
   1759        /* if an overflow occurs, then get the preflighting length */
   1760        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) {
   1761            char buffer[1024];
   1762 
   1763            destLimit=buffer+sizeof(buffer);
   1764            do {
   1765                dest=buffer;
   1766                bufferStatus=U_ZERO_ERROR;
   1767                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
   1768                destLength+=(int32_t)(dest-buffer);
   1769            } while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
   1770        }
   1771        if (U_FAILURE(bufferStatus)) {
   1772            *pErrorCode = bufferStatus;
   1773        }
   1774    } else {
   1775        destLength=0;
   1776    }
   1777 
   1778    return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
   1779 }
   1780 
   1781 U_CAPI int32_t U_EXPORT2
   1782 ucnv_toUChars(UConverter *cnv,
   1783              char16_t *dest, int32_t destCapacity,
   1784              const char *src, int32_t srcLength,
   1785              UErrorCode *pErrorCode) {
   1786    const char *srcLimit;
   1787    char16_t *originalDest, *destLimit;
   1788    int32_t destLength;
   1789 
   1790    /* check arguments */
   1791    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   1792        return 0;
   1793    }
   1794 
   1795    if( cnv==nullptr ||
   1796        destCapacity<0 || (destCapacity>0 && dest==nullptr) ||
   1797        srcLength<-1 || (srcLength!=0 && src==nullptr))
   1798    {
   1799        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1800        return 0;
   1801    }
   1802 
   1803    /* initialize */
   1804    ucnv_resetToUnicode(cnv);
   1805    originalDest=dest;
   1806    if(srcLength==-1) {
   1807        srcLength=(int32_t)uprv_strlen(src);
   1808    }
   1809    if(srcLength>0) {
   1810        srcLimit=src+srcLength;
   1811        destCapacity=pinCapacity(dest, destCapacity);
   1812        destLimit=dest+destCapacity;
   1813 
   1814        /* perform the conversion */
   1815        UErrorCode bufferStatus = U_ZERO_ERROR;
   1816        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
   1817        destLength=(int32_t)(dest-originalDest);
   1818 
   1819        /* if an overflow occurs, then get the preflighting length */
   1820        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR)
   1821        {
   1822            char16_t buffer[1024];
   1823 
   1824            destLimit=buffer+UPRV_LENGTHOF(buffer);
   1825            do {
   1826                dest=buffer;
   1827                bufferStatus=U_ZERO_ERROR;
   1828                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
   1829                destLength+=(int32_t)(dest-buffer);
   1830            }
   1831            while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
   1832        }
   1833        if (U_FAILURE(bufferStatus)) {
   1834            *pErrorCode = bufferStatus;
   1835        }
   1836    } else {
   1837        destLength=0;
   1838    }
   1839 
   1840    return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
   1841 }
   1842 
   1843 /* ucnv_getNextUChar() ------------------------------------------------------ */
   1844 
   1845 U_CAPI UChar32 U_EXPORT2
   1846 ucnv_getNextUChar(UConverter *cnv,
   1847                  const char **source, const char *sourceLimit,
   1848                  UErrorCode *err) {
   1849    UConverterToUnicodeArgs args;
   1850    char16_t buffer[U16_MAX_LENGTH];
   1851    const char *s;
   1852    UChar32 c;
   1853    int32_t i, length;
   1854 
   1855    /* check parameters */
   1856    if(err==nullptr || U_FAILURE(*err)) {
   1857        return 0xffff;
   1858    }
   1859 
   1860    if(cnv==nullptr || source==nullptr) {
   1861        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1862        return 0xffff;
   1863    }
   1864 
   1865    s=*source;
   1866    if(sourceLimit<s) {
   1867        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1868        return 0xffff;
   1869    }
   1870 
   1871    /*
   1872     * Make sure that the buffer sizes do not exceed the number range for
   1873     * int32_t because some functions use the size (in units or bytes)
   1874     * rather than comparing pointers, and because offsets are int32_t values.
   1875     *
   1876     * size_t is guaranteed to be unsigned and large enough for the job.
   1877     *
   1878     * Return with an error instead of adjusting the limits because we would
   1879     * not be able to maintain the semantics that either the source must be
   1880     * consumed or the target filled (unless an error occurs).
   1881     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
   1882     */
   1883    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
   1884        *err=U_ILLEGAL_ARGUMENT_ERROR;
   1885        return 0xffff;
   1886    }
   1887 
   1888    c=U_SENTINEL;
   1889 
   1890    /* flush the target overflow buffer */
   1891    if(cnv->UCharErrorBufferLength>0) {
   1892        char16_t *overflow;
   1893 
   1894        overflow=cnv->UCharErrorBuffer;
   1895        i=0;
   1896        length=cnv->UCharErrorBufferLength;
   1897        U16_NEXT(overflow, i, length, c);
   1898 
   1899        /* move the remaining overflow contents up to the beginning */
   1900        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
   1901            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
   1902                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
   1903        }
   1904 
   1905        if(!U16_IS_LEAD(c) || i<length) {
   1906            return c;
   1907        }
   1908        /*
   1909         * Continue if the overflow buffer contained only a lead surrogate,
   1910         * in case the converter outputs single surrogates from complete
   1911         * input sequences.
   1912         */
   1913    }
   1914 
   1915    /*
   1916     * flush==true is implied for ucnv_getNextUChar()
   1917     *
   1918     * do not simply return even if s==sourceLimit because the converter may
   1919     * not have seen flush==true before
   1920     */
   1921 
   1922    /* prepare the converter arguments */
   1923    args.converter=cnv;
   1924    args.flush=true;
   1925    args.offsets=nullptr;
   1926    args.source=s;
   1927    args.sourceLimit=sourceLimit;
   1928    args.target=buffer;
   1929    args.targetLimit=buffer+1;
   1930    args.size=sizeof(args);
   1931 
   1932    if(c<0) {
   1933        /*
   1934         * call the native getNextUChar() implementation if we are
   1935         * at a character boundary (toULength==0)
   1936         *
   1937         * unlike with _toUnicode(), getNextUChar() implementations must set
   1938         * U_TRUNCATED_CHAR_FOUND for truncated input,
   1939         * in addition to setting toULength/toUBytes[]
   1940         */
   1941        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=nullptr) {
   1942            c=cnv->sharedData->impl->getNextUChar(&args, err);
   1943            *source=s=args.source;
   1944            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
   1945                /* reset the converter without calling the callback function */
   1946                _reset(cnv, UCNV_RESET_TO_UNICODE, false);
   1947                return 0xffff; /* no output */
   1948            } else if(U_SUCCESS(*err) && c>=0) {
   1949                return c;
   1950            /*
   1951             * else fall through to use _toUnicode() because
   1952             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
   1953             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
   1954             */
   1955            }
   1956        }
   1957 
   1958        /* convert to one char16_t in buffer[0], or handle getNextUChar() errors */
   1959        _toUnicodeWithCallback(&args, err);
   1960 
   1961        if(*err==U_BUFFER_OVERFLOW_ERROR) {
   1962            *err=U_ZERO_ERROR;
   1963        }
   1964 
   1965        i=0;
   1966        length=(int32_t)(args.target-buffer);
   1967    } else {
   1968        /* write the lead surrogate from the overflow buffer */
   1969        buffer[0]=(char16_t)c;
   1970        args.target=buffer+1;
   1971        i=0;
   1972        length=1;
   1973    }
   1974 
   1975    /* buffer contents starts at i and ends before length */
   1976 
   1977    if(U_FAILURE(*err)) {
   1978        c=0xffff; /* no output */
   1979    } else if(length==0) {
   1980        /* no input or only state changes */
   1981        *err=U_INDEX_OUTOFBOUNDS_ERROR;
   1982        /* no need to reset explicitly because _toUnicodeWithCallback() did it */
   1983        c=0xffff; /* no output */
   1984    } else {
   1985        c=buffer[0];
   1986        i=1;
   1987        if(!U16_IS_LEAD(c)) {
   1988            /* consume c=buffer[0], done */
   1989        } else {
   1990            /* got a lead surrogate, see if a trail surrogate follows */
   1991            char16_t c2;
   1992 
   1993            if(cnv->UCharErrorBufferLength>0) {
   1994                /* got overflow output from the conversion */
   1995                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
   1996                    /* got a trail surrogate, too */
   1997                    c=U16_GET_SUPPLEMENTARY(c, c2);
   1998 
   1999                    /* move the remaining overflow contents up to the beginning */
   2000                    if((--cnv->UCharErrorBufferLength)>0) {
   2001                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
   2002                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
   2003                    }
   2004                } else {
   2005                    /* c is an unpaired lead surrogate, just return it */
   2006                }
   2007            } else if(args.source<sourceLimit) {
   2008                /* convert once more, to buffer[1] */
   2009                args.targetLimit=buffer+2;
   2010                _toUnicodeWithCallback(&args, err);
   2011                if(*err==U_BUFFER_OVERFLOW_ERROR) {
   2012                    *err=U_ZERO_ERROR;
   2013                }
   2014 
   2015                length=(int32_t)(args.target-buffer);
   2016                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
   2017                    /* got a trail surrogate, too */
   2018                    c=U16_GET_SUPPLEMENTARY(c, c2);
   2019                    i=2;
   2020                }
   2021            }
   2022        }
   2023    }
   2024 
   2025    /*
   2026     * move leftover output from buffer[i..length[
   2027     * into the beginning of the overflow buffer
   2028     */
   2029    if(i<length) {
   2030        /* move further overflow back */
   2031        int32_t delta=length-i;
   2032        if((length=cnv->UCharErrorBufferLength)>0) {
   2033            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
   2034                         length*U_SIZEOF_UCHAR);
   2035        }
   2036        cnv->UCharErrorBufferLength=(int8_t)(length+delta);
   2037 
   2038        cnv->UCharErrorBuffer[0]=buffer[i++];
   2039        if(delta>1) {
   2040            cnv->UCharErrorBuffer[1]=buffer[i];
   2041        }
   2042    }
   2043 
   2044    *source=args.source;
   2045    return c;
   2046 }
   2047 
   2048 /* ucnv_convert() and siblings ---------------------------------------------- */
   2049 
   2050 U_CAPI void U_EXPORT2
   2051 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
   2052               char **target, const char *targetLimit,
   2053               const char **source, const char *sourceLimit,
   2054               char16_t *pivotStart, char16_t **pivotSource,
   2055               char16_t **pivotTarget, const char16_t *pivotLimit,
   2056               UBool reset, UBool flush,
   2057               UErrorCode *pErrorCode) {
   2058    char16_t pivotBuffer[CHUNK_SIZE];
   2059    const char16_t *myPivotSource;
   2060    char16_t *myPivotTarget;
   2061    const char *s;
   2062    char *t;
   2063 
   2064    UConverterToUnicodeArgs toUArgs;
   2065    UConverterFromUnicodeArgs fromUArgs;
   2066    UConverterConvert convert;
   2067 
   2068    /* error checking */
   2069    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   2070        return;
   2071    }
   2072 
   2073    if( targetCnv==nullptr || sourceCnv==nullptr ||
   2074        source==nullptr || *source==nullptr ||
   2075        target==nullptr || *target==nullptr || targetLimit==nullptr
   2076    ) {
   2077        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2078        return;
   2079    }
   2080 
   2081    s=*source;
   2082    t=*target;
   2083    if((sourceLimit!=nullptr && sourceLimit<s) || targetLimit<t) {
   2084        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2085        return;
   2086    }
   2087 
   2088    /*
   2089     * Make sure that the buffer sizes do not exceed the number range for
   2090     * int32_t. See ucnv_toUnicode() for a more detailed comment.
   2091     */
   2092    if(
   2093        (sourceLimit!=nullptr && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
   2094        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
   2095    ) {
   2096        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2097        return;
   2098    }
   2099    
   2100    if(pivotStart==nullptr) {
   2101        if(!flush) {
   2102            /* streaming conversion requires an explicit pivot buffer */
   2103            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2104            return;
   2105        }
   2106 
   2107        /* use the stack pivot buffer */
   2108        myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
   2109        pivotSource=(char16_t **)&myPivotSource;
   2110        pivotTarget=&myPivotTarget;
   2111        pivotLimit=pivotBuffer+CHUNK_SIZE;
   2112    } else if(  pivotStart>=pivotLimit ||
   2113                pivotSource==nullptr || *pivotSource==nullptr ||
   2114                pivotTarget==nullptr || *pivotTarget==nullptr ||
   2115                pivotLimit==nullptr
   2116    ) {
   2117        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2118        return;
   2119    }
   2120 
   2121    if(sourceLimit==nullptr) {
   2122        /* get limit of single-byte-NUL-terminated source string */
   2123        sourceLimit=uprv_strchr(*source, 0);
   2124    }
   2125 
   2126    if(reset) {
   2127        ucnv_resetToUnicode(sourceCnv);
   2128        ucnv_resetFromUnicode(targetCnv);
   2129        *pivotSource=*pivotTarget=pivotStart;
   2130    } else if(targetCnv->charErrorBufferLength>0) {
   2131        /* output the targetCnv overflow buffer */
   2132        if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, nullptr, pErrorCode)) {
   2133            /* U_BUFFER_OVERFLOW_ERROR */
   2134            return;
   2135        }
   2136        /* *target has moved, therefore stop using t */
   2137 
   2138        if( !flush &&
   2139            targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
   2140            sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
   2141        ) {
   2142            /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
   2143            return;
   2144        }
   2145    }
   2146 
   2147    /* Is direct-UTF-8 conversion available? */
   2148    if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
   2149        targetCnv->sharedData->impl->fromUTF8!=nullptr
   2150    ) {
   2151        convert=targetCnv->sharedData->impl->fromUTF8;
   2152    } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
   2153               sourceCnv->sharedData->impl->toUTF8!=nullptr
   2154    ) {
   2155        convert=sourceCnv->sharedData->impl->toUTF8;
   2156    } else {
   2157        convert=nullptr;
   2158    }
   2159 
   2160    /*
   2161     * If direct-UTF-8 conversion is available, then we use a smaller
   2162     * pivot buffer for error handling and partial matches
   2163     * so that we quickly return to direct conversion.
   2164     *
   2165     * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
   2166     *
   2167     * We could reduce the pivot buffer size further, at the cost of
   2168     * buffer overflows from callbacks.
   2169     * The pivot buffer should not be smaller than the maximum number of
   2170     * fromUnicode extension table input UChars
   2171     * (for m:n conversion, see
   2172     * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
   2173     * or 2 for surrogate pairs.
   2174     *
   2175     * Too small a buffer can cause thrashing between pivoting and direct
   2176     * conversion, with function call overhead outweighing the benefits
   2177     * of direct conversion.
   2178     */
   2179    if(convert!=nullptr && (pivotLimit-pivotStart)>32) {
   2180        pivotLimit=pivotStart+32;
   2181    }
   2182 
   2183    /* prepare the converter arguments */
   2184    fromUArgs.converter=targetCnv;
   2185    fromUArgs.flush=false;
   2186    fromUArgs.offsets=nullptr;
   2187    fromUArgs.target=*target;
   2188    fromUArgs.targetLimit=targetLimit;
   2189    fromUArgs.size=sizeof(fromUArgs);
   2190 
   2191    toUArgs.converter=sourceCnv;
   2192    toUArgs.flush=flush;
   2193    toUArgs.offsets=nullptr;
   2194    toUArgs.source=s;
   2195    toUArgs.sourceLimit=sourceLimit;
   2196    toUArgs.targetLimit=pivotLimit;
   2197    toUArgs.size=sizeof(toUArgs);
   2198 
   2199    /*
   2200     * TODO: Consider separating this function into two functions,
   2201     * extracting exactly the conversion loop,
   2202     * for readability and to reduce the set of visible variables.
   2203     *
   2204     * Otherwise stop using s and t from here on.
   2205     */
   2206    s=t=nullptr;
   2207 
   2208    /*
   2209     * conversion loop
   2210     *
   2211     * The sequence of steps in the loop may appear backward,
   2212     * but the principle is simple:
   2213     * In the chain of
   2214     *   source - sourceCnv overflow - pivot - targetCnv overflow - target
   2215     * empty out later buffers before refilling them from earlier ones.
   2216     *
   2217     * The targetCnv overflow buffer is flushed out only once before the loop.
   2218     */
   2219    for(;;) {
   2220        /*
   2221         * if(pivot not empty or error or replay or flush fromUnicode) {
   2222         *   fromUnicode(pivot -> target);
   2223         * }
   2224         *
   2225         * For pivoting conversion; and for direct conversion for
   2226         * error callback handling and flushing the replay buffer.
   2227         */
   2228        if( *pivotSource<*pivotTarget ||
   2229            U_FAILURE(*pErrorCode) ||
   2230            targetCnv->preFromULength<0 ||
   2231            fromUArgs.flush
   2232        ) {
   2233            fromUArgs.source=*pivotSource;
   2234            fromUArgs.sourceLimit=*pivotTarget;
   2235            _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
   2236            if(U_FAILURE(*pErrorCode)) {
   2237                /* target overflow, or conversion error */
   2238                *pivotSource=(char16_t *)fromUArgs.source;
   2239                break;
   2240            }
   2241 
   2242            /*
   2243             * _fromUnicodeWithCallback() must have consumed the pivot contents
   2244             * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
   2245             */
   2246        }
   2247 
   2248        /* The pivot buffer is empty; reset it so we start at pivotStart. */
   2249        *pivotSource=*pivotTarget=pivotStart;
   2250 
   2251        /*
   2252         * if(sourceCnv overflow buffer not empty) {
   2253         *     move(sourceCnv overflow buffer -> pivot);
   2254         *     continue;
   2255         * }
   2256         */
   2257        /* output the sourceCnv overflow buffer */
   2258        if(sourceCnv->UCharErrorBufferLength>0) {
   2259            if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, nullptr, pErrorCode)) {
   2260                /* U_BUFFER_OVERFLOW_ERROR */
   2261                *pErrorCode=U_ZERO_ERROR;
   2262            }
   2263            continue;
   2264        }
   2265 
   2266        /*
   2267         * check for end of input and break if done
   2268         *
   2269         * Checking both flush and fromUArgs.flush ensures that the converters
   2270         * have been called with the flush flag set if the ucnv_convertEx()
   2271         * caller set it.
   2272         */
   2273        if( toUArgs.source==sourceLimit &&
   2274            sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
   2275            (!flush || fromUArgs.flush)
   2276        ) {
   2277            /* done successfully */
   2278            break;
   2279        }
   2280 
   2281        /*
   2282         * use direct conversion if available
   2283         * but not if continuing a partial match
   2284         * or flushing the toUnicode replay buffer
   2285         */
   2286        if(convert!=nullptr && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
   2287            if(*pErrorCode==U_USING_DEFAULT_WARNING) {
   2288                /* remove a warning that may be set by this function */
   2289                *pErrorCode=U_ZERO_ERROR;
   2290            }
   2291            convert(&fromUArgs, &toUArgs, pErrorCode);
   2292            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   2293                break;
   2294            } else if(U_FAILURE(*pErrorCode)) {
   2295                if(sourceCnv->toULength>0) {
   2296                    /*
   2297                     * Fall through to calling _toUnicodeWithCallback()
   2298                     * for callback handling.
   2299                     *
   2300                     * The pivot buffer will be reset with
   2301                     *   *pivotSource=*pivotTarget=pivotStart;
   2302                     * which indicates a toUnicode error to the caller
   2303                     * (*pivotSource==pivotStart shows no pivot UChars consumed).
   2304                     */
   2305                } else {
   2306                    /*
   2307                     * Indicate a fromUnicode error to the caller
   2308                     * (*pivotSource>pivotStart shows some pivot UChars consumed).
   2309                     */
   2310                    *pivotSource=*pivotTarget=pivotStart+1;
   2311                    /*
   2312                     * Loop around to calling _fromUnicodeWithCallbacks()
   2313                     * for callback handling.
   2314                     */
   2315                    continue;
   2316                }
   2317            } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
   2318                /*
   2319                 * No error, but the implementation requested to temporarily
   2320                 * fall back to pivoting.
   2321                 */
   2322                *pErrorCode=U_ZERO_ERROR;
   2323            /*
   2324             * The following else branches are almost identical to the end-of-input
   2325             * handling in _toUnicodeWithCallback().
   2326             * Avoid calling it just for the end of input.
   2327             */
   2328            } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
   2329                /*
   2330                 * the entire input stream is consumed
   2331                 * and there is a partial, truncated input sequence left
   2332                 */
   2333 
   2334                /* inject an error and continue with callback handling */
   2335                *pErrorCode=U_TRUNCATED_CHAR_FOUND;
   2336            } else {
   2337                /* input consumed */
   2338                if(flush) {
   2339                    /* reset the converters without calling the callback functions */
   2340                    _reset(sourceCnv, UCNV_RESET_TO_UNICODE, false);
   2341                    _reset(targetCnv, UCNV_RESET_FROM_UNICODE, false);
   2342                }
   2343 
   2344                /* done successfully */
   2345                break;
   2346            }
   2347        }
   2348        
   2349        /*
   2350         * toUnicode(source -> pivot);
   2351         *
   2352         * For pivoting conversion; and for direct conversion for
   2353         * error callback handling, continuing partial matches
   2354         * and flushing the replay buffer.
   2355         *
   2356         * The pivot buffer is empty and reset.
   2357         */
   2358        toUArgs.target=pivotStart; /* ==*pivotTarget */
   2359        /* toUArgs.targetLimit=pivotLimit; already set before the loop */
   2360        _toUnicodeWithCallback(&toUArgs, pErrorCode);
   2361        *pivotTarget=toUArgs.target;
   2362        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   2363            /* pivot overflow: continue with the conversion loop */
   2364            *pErrorCode=U_ZERO_ERROR;
   2365        } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
   2366            /* conversion error, or there was nothing left to convert */
   2367            break;
   2368        }
   2369        /*
   2370         * else:
   2371         * _toUnicodeWithCallback() wrote into the pivot buffer,
   2372         * continue with fromUnicode conversion.
   2373         *
   2374         * Set the fromUnicode flush flag if we flush and if toUnicode has
   2375         * processed the end of the input.
   2376         */
   2377        if( flush && toUArgs.source==sourceLimit &&
   2378            sourceCnv->preToULength>=0 &&
   2379            sourceCnv->UCharErrorBufferLength==0
   2380        ) {
   2381            fromUArgs.flush=true;
   2382        }
   2383    }
   2384 
   2385    /*
   2386     * The conversion loop is exited when one of the following is true:
   2387     * - the entire source text has been converted successfully to the target buffer
   2388     * - a target buffer overflow occurred
   2389     * - a conversion error occurred
   2390     */
   2391 
   2392    *source=toUArgs.source;
   2393    *target=fromUArgs.target;
   2394 
   2395    /* terminate the target buffer if possible */
   2396    if(flush && U_SUCCESS(*pErrorCode)) {
   2397        if(*target!=targetLimit) {
   2398            **target=0;
   2399            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
   2400                *pErrorCode=U_ZERO_ERROR;
   2401            }
   2402        } else {
   2403            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
   2404        }
   2405    }
   2406 }
   2407 
   2408 /* internal implementation of ucnv_convert() etc. with preflighting */
   2409 static int32_t
   2410 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
   2411                     char *target, int32_t targetCapacity,
   2412                     const char *source, int32_t sourceLength,
   2413                     UErrorCode *pErrorCode) {
   2414    char16_t pivotBuffer[CHUNK_SIZE];
   2415    char16_t *pivot, *pivot2;
   2416 
   2417    char *myTarget;
   2418    const char *sourceLimit;
   2419    const char *targetLimit;
   2420    int32_t targetLength=0;
   2421 
   2422    /* set up */
   2423    if(sourceLength<0) {
   2424        sourceLimit=uprv_strchr(source, 0);
   2425    } else {
   2426        sourceLimit=source+sourceLength;
   2427    }
   2428 
   2429    /* if there is no input data, we're done */
   2430    if(source==sourceLimit) {
   2431        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
   2432    }
   2433 
   2434    pivot=pivot2=pivotBuffer;
   2435    myTarget=target;
   2436    targetLength=0;
   2437 
   2438    if(targetCapacity>0) {
   2439        /* perform real conversion */
   2440        targetLimit=target+targetCapacity;
   2441        ucnv_convertEx(outConverter, inConverter,
   2442                       &myTarget, targetLimit,
   2443                       &source, sourceLimit,
   2444                       pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
   2445                       false,
   2446                       true,
   2447                       pErrorCode);
   2448        targetLength = static_cast<int32_t>(myTarget - target);
   2449    }
   2450 
   2451    /*
   2452     * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
   2453     * to it but continue the conversion in order to store in targetCapacity
   2454     * the number of bytes that was required.
   2455     */
   2456    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
   2457    {
   2458        char targetBuffer[CHUNK_SIZE];
   2459 
   2460        targetLimit=targetBuffer+CHUNK_SIZE;
   2461        do {
   2462            *pErrorCode=U_ZERO_ERROR;
   2463            myTarget=targetBuffer;
   2464            ucnv_convertEx(outConverter, inConverter,
   2465                           &myTarget, targetLimit,
   2466                           &source, sourceLimit,
   2467                           pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
   2468                           false,
   2469                           true,
   2470                           pErrorCode);
   2471            targetLength += static_cast<int32_t>(myTarget - targetBuffer);
   2472        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
   2473 
   2474        /* done with preflighting, set warnings and errors as appropriate */
   2475        return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
   2476    }
   2477 
   2478    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
   2479    return targetLength;
   2480 }
   2481 
   2482 U_CAPI int32_t U_EXPORT2
   2483 ucnv_convert(const char *toConverterName, const char *fromConverterName,
   2484             char *target, int32_t targetCapacity,
   2485             const char *source, int32_t sourceLength,
   2486             UErrorCode *pErrorCode) {
   2487    UConverter in, out; /* stack-allocated */
   2488    UConverter *inConverter, *outConverter;
   2489    int32_t targetLength;
   2490 
   2491    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   2492        return 0;
   2493    }
   2494 
   2495    if( source==nullptr || sourceLength<-1 ||
   2496        targetCapacity<0 || (targetCapacity>0 && target==nullptr)
   2497    ) {
   2498        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2499        return 0;
   2500    }
   2501 
   2502    /* if there is no input data, we're done */
   2503    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
   2504        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
   2505    }
   2506 
   2507    /* create the converters */
   2508    inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
   2509    if(U_FAILURE(*pErrorCode)) {
   2510        return 0;
   2511    }
   2512 
   2513    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
   2514    if(U_FAILURE(*pErrorCode)) {
   2515        ucnv_close(inConverter);
   2516        return 0;
   2517    }
   2518 
   2519    targetLength=ucnv_internalConvert(outConverter, inConverter,
   2520                                      target, targetCapacity,
   2521                                      source, sourceLength,
   2522                                      pErrorCode);
   2523 
   2524    ucnv_close(inConverter);
   2525    ucnv_close(outConverter);
   2526 
   2527    return targetLength;
   2528 }
   2529 
   2530 /* @internal */
   2531 static int32_t
   2532 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
   2533                        UConverterType algorithmicType,
   2534                        UConverter *cnv,
   2535                        char *target, int32_t targetCapacity,
   2536                        const char *source, int32_t sourceLength,
   2537                        UErrorCode *pErrorCode) {
   2538    UConverter algoConverterStatic; /* stack-allocated */
   2539    UConverter *algoConverter, *to, *from;
   2540    int32_t targetLength;
   2541 
   2542    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   2543        return 0;
   2544    }
   2545 
   2546    if( cnv==nullptr || source==nullptr || sourceLength<-1 ||
   2547        targetCapacity<0 || (targetCapacity>0 && target==nullptr)
   2548    ) {
   2549        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2550        return 0;
   2551    }
   2552 
   2553    /* if there is no input data, we're done */
   2554    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
   2555        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
   2556    }
   2557 
   2558    /* create the algorithmic converter */
   2559    algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
   2560                                                  "", 0, pErrorCode);
   2561    if(U_FAILURE(*pErrorCode)) {
   2562        return 0;
   2563    }
   2564 
   2565    /* reset the other converter */
   2566    if(convertToAlgorithmic) {
   2567        /* cnv->Unicode->algo */
   2568        ucnv_resetToUnicode(cnv);
   2569        to=algoConverter;
   2570        from=cnv;
   2571    } else {
   2572        /* algo->Unicode->cnv */
   2573        ucnv_resetFromUnicode(cnv);
   2574        from=algoConverter;
   2575        to=cnv;
   2576    }
   2577 
   2578    targetLength=ucnv_internalConvert(to, from,
   2579                                      target, targetCapacity,
   2580                                      source, sourceLength,
   2581                                      pErrorCode);
   2582 
   2583    ucnv_close(algoConverter);
   2584 
   2585    return targetLength;
   2586 }
   2587 
   2588 U_CAPI int32_t U_EXPORT2
   2589 ucnv_toAlgorithmic(UConverterType algorithmicType,
   2590                   UConverter *cnv,
   2591                   char *target, int32_t targetCapacity,
   2592                   const char *source, int32_t sourceLength,
   2593                   UErrorCode *pErrorCode) {
   2594    return ucnv_convertAlgorithmic(true, algorithmicType, cnv,
   2595                                   target, targetCapacity,
   2596                                   source, sourceLength,
   2597                                   pErrorCode);
   2598 }
   2599 
   2600 U_CAPI int32_t U_EXPORT2
   2601 ucnv_fromAlgorithmic(UConverter *cnv,
   2602                     UConverterType algorithmicType,
   2603                     char *target, int32_t targetCapacity,
   2604                     const char *source, int32_t sourceLength,
   2605                     UErrorCode *pErrorCode) UPRV_NO_SANITIZE_UNDEFINED {
   2606 
   2607    if(algorithmicType<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=algorithmicType) {
   2608        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   2609        return 0;
   2610    }
   2611    return ucnv_convertAlgorithmic(false, algorithmicType, cnv,
   2612                                   target, targetCapacity,
   2613                                   source, sourceLength,
   2614                                   pErrorCode);
   2615 }
   2616 
   2617 U_CAPI UConverterType  U_EXPORT2
   2618 ucnv_getType(const UConverter* converter)
   2619 {
   2620    int8_t type = converter->sharedData->staticData->conversionType;
   2621 #if !UCONFIG_NO_LEGACY_CONVERSION
   2622    if(type == UCNV_MBCS) {
   2623        return ucnv_MBCSGetType(converter);
   2624    }
   2625 #endif
   2626    return (UConverterType)type;
   2627 }
   2628 
   2629 U_CAPI void  U_EXPORT2
   2630 ucnv_getStarters(const UConverter* converter, 
   2631                 UBool starters[256],
   2632                 UErrorCode* err)
   2633 {
   2634    if (err == nullptr || U_FAILURE(*err)) {
   2635        return;
   2636    }
   2637 
   2638    if(converter->sharedData->impl->getStarters != nullptr) {
   2639        converter->sharedData->impl->getStarters(converter, starters, err);
   2640    } else {
   2641        *err = U_ILLEGAL_ARGUMENT_ERROR;
   2642    }
   2643 }
   2644 
   2645 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
   2646 {
   2647    UErrorCode errorCode;
   2648    const char *name;
   2649    int32_t i;
   2650 
   2651    if(cnv==nullptr) {
   2652        return nullptr;
   2653    }
   2654 
   2655    errorCode=U_ZERO_ERROR;
   2656    name=ucnv_getName(cnv, &errorCode);
   2657    if(U_FAILURE(errorCode)) {
   2658        return nullptr;
   2659    }
   2660 
   2661    for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i)
   2662    {
   2663        if(0==uprv_strcmp(name, ambiguousConverters[i].name))
   2664        {
   2665            return ambiguousConverters+i;
   2666        }
   2667    }
   2668 
   2669    return nullptr;
   2670 }
   2671 
   2672 U_CAPI void  U_EXPORT2
   2673 ucnv_fixFileSeparator(const UConverter *cnv, 
   2674                      char16_t* source,
   2675                      int32_t sourceLength) {
   2676    const UAmbiguousConverter *a;
   2677    int32_t i;
   2678    char16_t variant5c;
   2679 
   2680    if(cnv==nullptr || source==nullptr || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==nullptr)
   2681    {
   2682        return;
   2683    }
   2684 
   2685    variant5c=a->variant5c;
   2686    for(i=0; i<sourceLength; ++i) {
   2687        if(source[i]==variant5c) {
   2688            source[i]=0x5c;
   2689        }
   2690    }
   2691 }
   2692 
   2693 U_CAPI UBool  U_EXPORT2
   2694 ucnv_isAmbiguous(const UConverter *cnv) {
   2695    return ucnv_getAmbiguous(cnv)!=nullptr;
   2696 }
   2697 
   2698 U_CAPI void  U_EXPORT2
   2699 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
   2700 {
   2701    cnv->useFallback = usesFallback;
   2702 }
   2703 
   2704 U_CAPI UBool  U_EXPORT2
   2705 ucnv_usesFallback(const UConverter *cnv)
   2706 {
   2707    return cnv->useFallback;
   2708 }
   2709 
   2710 U_CAPI void  U_EXPORT2
   2711 ucnv_getInvalidChars (const UConverter * converter,
   2712                      char *errBytes,
   2713                      int8_t * len,
   2714                      UErrorCode * err)
   2715 {
   2716    if (err == nullptr || U_FAILURE(*err))
   2717    {
   2718        return;
   2719    }
   2720    if (len == nullptr || errBytes == nullptr || converter == nullptr)
   2721    {
   2722        *err = U_ILLEGAL_ARGUMENT_ERROR;
   2723        return;
   2724    }
   2725    if (*len < converter->invalidCharLength)
   2726    {
   2727        *err = U_INDEX_OUTOFBOUNDS_ERROR;
   2728        return;
   2729    }
   2730    if ((*len = converter->invalidCharLength) > 0)
   2731    {
   2732        uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
   2733    }
   2734 }
   2735 
   2736 U_CAPI void  U_EXPORT2
   2737 ucnv_getInvalidUChars (const UConverter * converter,
   2738                       char16_t *errChars,
   2739                       int8_t * len,
   2740                       UErrorCode * err)
   2741 {
   2742    if (err == nullptr || U_FAILURE(*err))
   2743    {
   2744        return;
   2745    }
   2746    if (len == nullptr || errChars == nullptr || converter == nullptr)
   2747    {
   2748        *err = U_ILLEGAL_ARGUMENT_ERROR;
   2749        return;
   2750    }
   2751    if (*len < converter->invalidUCharLength)
   2752    {
   2753        *err = U_INDEX_OUTOFBOUNDS_ERROR;
   2754        return;
   2755    }
   2756    if ((*len = converter->invalidUCharLength) > 0)
   2757    {
   2758        u_memcpy (errChars, converter->invalidUCharBuffer, *len);
   2759    }
   2760 }
   2761 
   2762 #define SIG_MAX_LEN 5
   2763 
   2764 U_CAPI const char* U_EXPORT2
   2765 ucnv_detectUnicodeSignature( const char* source,
   2766                             int32_t sourceLength,
   2767                             int32_t* signatureLength,
   2768                             UErrorCode* pErrorCode) {
   2769    int32_t dummy;
   2770 
   2771    /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
   2772     * bytes we don't misdetect something 
   2773     */
   2774    char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
   2775    int i = 0;
   2776 
   2777    if((pErrorCode==nullptr) || U_FAILURE(*pErrorCode)){
   2778        return nullptr;
   2779    }
   2780    
   2781    if(source == nullptr || sourceLength < -1){
   2782        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   2783        return nullptr;
   2784    }
   2785 
   2786    if(signatureLength == nullptr) {
   2787        signatureLength = &dummy;
   2788    }
   2789 
   2790    if(sourceLength==-1){
   2791        sourceLength=(int32_t)uprv_strlen(source);
   2792    }
   2793 
   2794    
   2795    while(i<sourceLength&& i<SIG_MAX_LEN){
   2796        start[i]=source[i];
   2797        i++;
   2798    }
   2799 
   2800    if(start[0] == '\xFE' && start[1] == '\xFF') {
   2801        *signatureLength=2;
   2802        return  "UTF-16BE";
   2803    } else if(start[0] == '\xFF' && start[1] == '\xFE') {
   2804        if(start[2] == '\x00' && start[3] =='\x00') {
   2805            *signatureLength=4;
   2806            return "UTF-32LE";
   2807        } else {
   2808            *signatureLength=2;
   2809            return  "UTF-16LE";
   2810        }
   2811    } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
   2812        *signatureLength=3;
   2813        return  "UTF-8";
   2814    } else if(start[0] == '\x00' && start[1] == '\x00' && 
   2815              start[2] == '\xFE' && start[3]=='\xFF') {
   2816        *signatureLength=4;
   2817        return  "UTF-32BE";
   2818    } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
   2819        *signatureLength=3;
   2820        return "SCSU";
   2821    } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
   2822        *signatureLength=3;
   2823        return "BOCU-1";
   2824    } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
   2825        /*
   2826         * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
   2827         * depending on the second UTF-16 code unit.
   2828         * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
   2829         * if it occurs.
   2830         *
   2831         * So far we have +/v
   2832         */
   2833        if(start[3] == '\x38' && start[4] == '\x2D') {
   2834            /* 5 bytes +/v8- */
   2835            *signatureLength=5;
   2836            return "UTF-7";
   2837        } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
   2838            /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
   2839            *signatureLength=4;
   2840            return "UTF-7";
   2841        }
   2842    }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
   2843        *signatureLength=4;
   2844        return "UTF-EBCDIC";
   2845    }
   2846 
   2847 
   2848    /* no known Unicode signature byte sequence recognized */
   2849    *signatureLength=0;
   2850    return nullptr;
   2851 }
   2852 
   2853 U_CAPI int32_t U_EXPORT2
   2854 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
   2855 {
   2856    if(status == nullptr || U_FAILURE(*status)){
   2857        return -1;
   2858    }
   2859    if(cnv == nullptr){
   2860        *status = U_ILLEGAL_ARGUMENT_ERROR;
   2861        return -1;
   2862    }
   2863 
   2864    if(cnv->preFromUFirstCP >= 0){
   2865        return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
   2866    }else if(cnv->preFromULength < 0){
   2867        return -cnv->preFromULength ;
   2868    }else if(cnv->fromUChar32 > 0){
   2869        return 1;
   2870    }
   2871    return 0; 
   2872 
   2873 }
   2874 
   2875 U_CAPI int32_t U_EXPORT2
   2876 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
   2877 
   2878    if(status == nullptr || U_FAILURE(*status)){
   2879        return -1;
   2880    }
   2881    if(cnv == nullptr){
   2882        *status = U_ILLEGAL_ARGUMENT_ERROR;
   2883        return -1;
   2884    }
   2885 
   2886    if(cnv->preToULength > 0){
   2887        return cnv->preToULength ;
   2888    }else if(cnv->preToULength < 0){
   2889        return -cnv->preToULength;
   2890    }else if(cnv->toULength > 0){
   2891        return cnv->toULength;
   2892    }
   2893    return 0;
   2894 }
   2895 
   2896 U_CAPI UBool U_EXPORT2
   2897 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
   2898    if (U_FAILURE(*status)) {
   2899        return false;
   2900    }
   2901 
   2902    if (cnv == nullptr) {
   2903        *status = U_ILLEGAL_ARGUMENT_ERROR;
   2904        return false;
   2905    }
   2906 
   2907    switch (ucnv_getType(cnv)) {
   2908        case UCNV_SBCS:
   2909        case UCNV_DBCS:
   2910        case UCNV_UTF32_BigEndian:
   2911        case UCNV_UTF32_LittleEndian:
   2912        case UCNV_UTF32:
   2913        case UCNV_US_ASCII:
   2914            return true;
   2915        default:
   2916            return false;
   2917    }
   2918 }
   2919 #endif
   2920 
   2921 /*
   2922 * Hey, Emacs, please set the following:
   2923 *
   2924 * Local Variables:
   2925 * indent-tabs-mode: nil
   2926 * End:
   2927 *
   2928 */
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE