tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ustr_wcs.cpp (16461B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2001-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  ustr_wcs.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2004sep07
     16 *   created by: Markus W. Scherer
     17 *
     18 *   u_strToWCS() and u_strFromWCS() functions
     19 *   moved here from ustrtrns.c for better modularization.
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/ustring.h"
     24 #include "cstring.h"
     25 #include "cwchar.h"
     26 #include "cmemory.h"
     27 #include "ustr_imp.h"
     28 #include "ustr_cnv.h"
     29 
     30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
     31 
     32 #define _STACK_BUFFER_CAPACITY 1000
     33 #define _BUFFER_CAPACITY_MULTIPLIER 2
     34 
     35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
     36 // TODO: We should use CharString for char buffers and UnicodeString for char16_t buffers.
     37 // Then we could change this to work only with wchar_t buffers.
     38 static inline UBool 
     39 u_growAnyBufferFromStatic(void *context,
     40                       void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
     41                       int32_t length, int32_t size) {
     42    // Use char* not void* to avoid the compiler's strict-aliasing assumptions
     43    // and related warnings.
     44    char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
     45    if(newBuffer!=nullptr) {
     46        if(length>0) {
     47            uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
     48        }
     49        *pCapacity=reqCapacity;
     50    } else {
     51        *pCapacity=0;
     52    }
     53 
     54    /* release the old pBuffer if it was not statically allocated */
     55    if(*pBuffer!=(char *)context) {
     56        uprv_free(*pBuffer);
     57    }
     58 
     59    *pBuffer=newBuffer;
     60    return newBuffer!=nullptr;
     61 }
     62 
     63 /* helper function */
     64 static wchar_t* 
     65 _strToWCS(wchar_t *dest, 
     66           int32_t destCapacity,
     67           int32_t *pDestLength,
     68           const char16_t *src,
     69           int32_t srcLength,
     70           UErrorCode *pErrorCode){
     71 
     72    char stackBuffer [_STACK_BUFFER_CAPACITY];
     73    char* tempBuf = stackBuffer;
     74    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
     75    char* tempBufLimit = stackBuffer + tempBufCapacity;
     76    UConverter* conv = nullptr;
     77    char* saveBuf = tempBuf;
     78    wchar_t* intTarget=nullptr;
     79    int32_t intTargetCapacity=0;
     80    int count=0,retVal=0;
     81    
     82    const char16_t *pSrcLimit =nullptr;
     83    const char16_t *pSrc = src;
     84 
     85    conv = u_getDefaultConverter(pErrorCode);
     86    
     87    if(U_FAILURE(*pErrorCode)){
     88        return nullptr;
     89    }
     90    
     91    if(srcLength == -1){
     92        srcLength = u_strlen(pSrc);
     93    }
     94    
     95    pSrcLimit = pSrc + srcLength;
     96 
     97    for(;;) {
     98        UErrorCode bufferStatus = U_ZERO_ERROR;
     99 
    100        /* convert to chars using default converter */
    101        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),&bufferStatus);
    102        count =(tempBuf - saveBuf);
    103        
    104        /* This should rarely occur */
    105        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
    106            tempBuf = saveBuf;
    107            
    108            /* we don't have enough room on the stack grow the buffer */
    109            int32_t newCapacity = 2 * srcLength;
    110            if(newCapacity <= tempBufCapacity) {
    111                newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
    112            }
    113            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    114                    newCapacity, count, 1)) {
    115                goto cleanup;
    116            }
    117          
    118           saveBuf = tempBuf;
    119           tempBufLimit = tempBuf + tempBufCapacity;
    120           tempBuf = tempBuf + count;
    121        } else {
    122            if (U_FAILURE(bufferStatus)) {
    123                *pErrorCode = bufferStatus;
    124                goto cleanup;
    125            }
    126            break;
    127        }
    128    }
    129 
    130    /* done with conversion null terminate the char buffer */
    131    if(count>=tempBufCapacity){
    132        tempBuf = saveBuf;
    133        /* we don't have enough room on the stack grow the buffer */
    134        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 
    135                count+1, count, 1)) {
    136            goto cleanup;
    137        }              
    138       saveBuf = tempBuf;
    139    }
    140    
    141    saveBuf[count]=0;
    142      
    143 
    144    /* allocate more space than required 
    145     * here we assume that every char requires 
    146     * no more than 2 wchar_ts
    147     */
    148    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
    149    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
    150 
    151    if(intTarget){
    152 
    153        int32_t nulLen = 0;
    154        int32_t remaining = intTargetCapacity;
    155        wchar_t* pIntTarget=intTarget;
    156        tempBuf = saveBuf;
    157        
    158        /* now convert the mbs to wcs */
    159        for(;;){
    160            
    161            /* we can call the system API since we are sure that
    162             * there is atleast 1 null in the input
    163             */
    164            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
    165            
    166            if(retVal==-1){
    167                *pErrorCode = U_INVALID_CHAR_FOUND;
    168                break;
    169            }else if(retVal== remaining){/* should never occur */
    170                int numWritten = (pIntTarget-intTarget);
    171                u_growAnyBufferFromStatic(nullptr,(void**) &intTarget,
    172                                          &intTargetCapacity,
    173                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
    174                                          numWritten,
    175                                          sizeof(wchar_t));
    176                pIntTarget = intTarget;
    177                remaining=intTargetCapacity;
    178 
    179                if(nulLen!=count){ /*there are embedded nulls*/
    180                    pIntTarget+=numWritten;
    181                    remaining-=numWritten;
    182                }
    183 
    184            }else{
    185                int32_t nulVal;
    186                /*scan for nulls */
    187                /* we donot check for limit since tempBuf is null terminated */
    188                while(tempBuf[nulLen++] != 0){
    189                }
    190                nulVal = (nulLen < srcLength) ? 1 : 0; 
    191                pIntTarget = pIntTarget + retVal+nulVal;
    192                remaining -=(retVal+nulVal);
    193            
    194                /* check if we have reached the source limit*/
    195                if(nulLen>=(count)){
    196                    break;
    197                }
    198            }
    199        }
    200        count = (int32_t)(pIntTarget-intTarget);
    201       
    202        if(0 < count && count <= destCapacity){
    203            uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
    204        }  
    205 
    206        if(pDestLength){
    207            *pDestLength = count;
    208        }
    209 
    210        /* free the allocated memory */
    211        uprv_free(intTarget);
    212 
    213    }else{
    214        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    215    }
    216 cleanup:
    217    /* are we still using stack buffer */
    218    if(stackBuffer != saveBuf){
    219        uprv_free(saveBuf);
    220    }
    221    u_terminateWChars(dest,destCapacity,count,pErrorCode);
    222 
    223    u_releaseDefaultConverter(conv);
    224 
    225    return dest;
    226 }
    227 #endif
    228 
    229 U_CAPI wchar_t* U_EXPORT2
    230 u_strToWCS(wchar_t *dest, 
    231           int32_t destCapacity,
    232           int32_t *pDestLength,
    233           const char16_t *src,
    234           int32_t srcLength,
    235           UErrorCode *pErrorCode){
    236 
    237    /* args check */
    238    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
    239        return nullptr;
    240    }
    241        
    242    if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
    243        (destCapacity<0) || (dest == nullptr && destCapacity > 0)
    244    ) {
    245        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    246        return nullptr;
    247    }
    248    
    249 #ifdef U_WCHAR_IS_UTF16
    250    /* wchar_t is UTF-16 just do a memcpy */
    251    if(srcLength == -1){
    252        srcLength = u_strlen(src);
    253    }
    254    if(0 < srcLength && srcLength <= destCapacity){
    255        u_memcpy((char16_t *)dest, src, srcLength);
    256    }
    257    if(pDestLength){
    258       *pDestLength = srcLength;
    259    }
    260 
    261    u_terminateUChars((char16_t *)dest,destCapacity,srcLength,pErrorCode);
    262 
    263    return dest;
    264 
    265 #elif defined U_WCHAR_IS_UTF32
    266    
    267    return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
    268                                  src, srcLength, pErrorCode);
    269 
    270 #else
    271    
    272    return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
    273    
    274 #endif
    275 
    276 }
    277 
    278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    279 /* helper function */
    280 static char16_t*
    281 _strFromWCS( char16_t   *dest,
    282             int32_t destCapacity, 
    283             int32_t *pDestLength,
    284             const wchar_t *src,
    285             int32_t srcLength,
    286             UErrorCode *pErrorCode)
    287 {
    288    int32_t retVal =0, count =0 ;
    289    UConverter* conv = nullptr;
    290    char16_t* pTarget = nullptr;
    291    char16_t* pTargetLimit = nullptr;
    292    char16_t* target = nullptr;
    293    
    294    char16_t uStack [_STACK_BUFFER_CAPACITY];
    295 
    296    wchar_t wStack[_STACK_BUFFER_CAPACITY];
    297    wchar_t* pWStack = wStack;
    298 
    299 
    300    char cStack[_STACK_BUFFER_CAPACITY];
    301    int32_t cStackCap = _STACK_BUFFER_CAPACITY;
    302    char* pCSrc=cStack;
    303    char* pCSave=pCSrc;
    304    char* pCSrcLimit=nullptr;
    305 
    306    const wchar_t* pSrc = src;
    307    const wchar_t* pSrcLimit = nullptr;
    308 
    309    if(srcLength ==-1){
    310        /* if the wchar_t source is null terminated we can safely
    311         * assume that there are no embedded nulls, this is a fast
    312         * path for null terminated strings.
    313         */
    314        for(;;){
    315            /* convert wchars  to chars */
    316            retVal = uprv_wcstombs(pCSrc,src, cStackCap);
    317    
    318            if(retVal == -1){
    319                *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    320                goto cleanup;
    321            }else if(retVal >= (cStackCap-1)){
    322                /* Should rarely occur */
    323                u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    324                    cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
    325                pCSave = pCSrc;
    326            }else{
    327                /* converted every thing */
    328                pCSrc = pCSrc+retVal;
    329                break;
    330            }
    331        }
    332        
    333    }else{
    334        /* here the source is not null terminated 
    335         * so it may have nulls embedded and we need to
    336         * do some extra processing 
    337         */
    338        int32_t remaining =cStackCap;
    339        
    340        pSrcLimit = src + srcLength;
    341 
    342        for(;;){
    343            int32_t nulLen = 0;
    344 
    345            /* find nulls in the string */
    346            while(nulLen<srcLength && pSrc[nulLen++]!=0){
    347            }
    348 
    349            if((pSrc+nulLen) < pSrcLimit){
    350                /* check if we have enough room in pCSrc */
    351                if(remaining < (nulLen * MB_CUR_MAX)){
    352                    /* should rarely occur */
    353                    int32_t len = (pCSrc-pCSave);
    354                    pCSrc = pCSave;
    355                    /* we do not have enough room so grow the buffer*/
    356                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    357                           _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    358 
    359                    pCSave = pCSrc;
    360                    pCSrc = pCSave+len;
    361                    remaining = cStackCap-(pCSrc - pCSave);
    362                }
    363 
    364                /* we have found a null  so convert the 
    365                 * chunk from beginning of non-null char to null
    366                 */
    367                retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
    368 
    369                if(retVal==-1){
    370                    /* an error occurred bail out */
    371                    *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    372                    goto cleanup;
    373                }
    374 
    375                pCSrc += retVal+1 /* already null terminated */;
    376 
    377                pSrc += nulLen; /* skip past the null */
    378                srcLength-=nulLen; /* decrement the srcLength */
    379                remaining -= (pCSrc-pCSave);
    380 
    381 
    382            }else{
    383                /* the source is not null terminated and we are 
    384                 * end of source so we copy the source to a temp buffer
    385                 * null terminate it and convert wchar_ts to chars
    386                 */
    387                if(nulLen >= _STACK_BUFFER_CAPACITY){
    388                    /* Should rarely occur */
    389                    /* allocate new buffer buffer */
    390                    pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
    391                    if(pWStack==nullptr){
    392                        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    393                        goto cleanup;
    394                    }
    395                }
    396                if(nulLen>0){
    397                    /* copy the contents to tempStack */
    398                    uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
    399                }
    400            
    401                /* null terminate the tempBuffer */
    402                pWStack[nulLen] =0 ;
    403            
    404                if(remaining < (nulLen * MB_CUR_MAX)){
    405                    /* Should rarely occur */
    406                    int32_t len = (pCSrc-pCSave);
    407                    pCSrc = pCSave;
    408                    /* we do not have enough room so grow the buffer*/
    409                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    410                           cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    411 
    412                    pCSave = pCSrc;
    413                    pCSrc = pCSave+len;
    414                    remaining = cStackCap-(pCSrc - pCSave);
    415                }
    416                /* convert to chars */
    417                retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
    418            
    419                pCSrc += retVal;
    420                pSrc  += nulLen;
    421                srcLength-=nulLen; /* decrement the srcLength */
    422                break;
    423            }
    424        }
    425    }
    426 
    427    /* OK..now we have converted from wchar_ts to chars now 
    428     * convert chars to UChars 
    429     */
    430    pCSrcLimit = pCSrc;
    431    pCSrc = pCSave;
    432    pTarget = target= dest;
    433    pTargetLimit = dest + destCapacity;    
    434    
    435    conv= u_getDefaultConverter(pErrorCode);
    436    
    437    if(U_FAILURE(*pErrorCode)|| conv==nullptr){
    438        goto cleanup;
    439    }
    440    
    441    for(;;) {
    442        UErrorCode bufferStatus = U_ZERO_ERROR;
    443 
    444        /* convert to stack buffer*/
    445        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),&bufferStatus);
    446 
    447        /* increment count to number written to stack */
    448        count+= pTarget - target;
    449 
    450        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
    451            target = uStack;
    452            pTarget = uStack;
    453            pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
    454        } else {
    455            if (U_FAILURE(bufferStatus)) {
    456                *pErrorCode = bufferStatus;
    457            }
    458            break;
    459        }
    460        
    461    }
    462    
    463    if(pDestLength){
    464        *pDestLength =count;
    465    }
    466 
    467    u_terminateUChars(dest,destCapacity,count,pErrorCode);
    468    
    469 cleanup:
    470 
    471    if(cStack != pCSave){
    472        uprv_free(pCSave);
    473    }
    474 
    475    if(wStack != pWStack){
    476        uprv_free(pWStack);
    477    }
    478    
    479    u_releaseDefaultConverter(conv);
    480 
    481    return dest;
    482 }
    483 #endif
    484 
    485 U_CAPI char16_t* U_EXPORT2
    486 u_strFromWCS(char16_t   *dest,
    487             int32_t destCapacity, 
    488             int32_t *pDestLength,
    489             const wchar_t *src,
    490             int32_t srcLength,
    491             UErrorCode *pErrorCode)
    492 {
    493 
    494    /* args check */
    495    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
    496        return nullptr;
    497    }
    498 
    499    if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
    500        (destCapacity<0) || (dest == nullptr && destCapacity > 0)
    501    ) {
    502        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    503        return nullptr;
    504    }
    505 
    506 #ifdef U_WCHAR_IS_UTF16
    507    /* wchar_t is UTF-16 just do a memcpy */
    508    if(srcLength == -1){
    509        srcLength = u_strlen((const char16_t *)src);
    510    }
    511    if(0 < srcLength && srcLength <= destCapacity){
    512        u_memcpy(dest, (const char16_t *)src, srcLength);
    513    }
    514    if(pDestLength){
    515       *pDestLength = srcLength;
    516    }
    517 
    518    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    519 
    520    return dest;
    521 
    522 #elif defined U_WCHAR_IS_UTF32
    523    
    524    return u_strFromUTF32(dest, destCapacity, pDestLength,
    525                          (UChar32*)src, srcLength, pErrorCode);
    526 
    527 #else
    528 
    529    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);  
    530 
    531 #endif
    532 
    533 }
    534 
    535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */