ustr_wcs.cpp (16461B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2001-2012, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: ustr_wcs.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2004sep07 16 * created by: Markus W. Scherer 17 * 18 * u_strToWCS() and u_strFromWCS() functions 19 * moved here from ustrtrns.c for better modularization. 20 */ 21 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "cstring.h" 25 #include "cwchar.h" 26 #include "cmemory.h" 27 #include "ustr_imp.h" 28 #include "ustr_cnv.h" 29 30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION 31 32 #define _STACK_BUFFER_CAPACITY 1000 33 #define _BUFFER_CAPACITY_MULTIPLIER 2 34 35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 36 // TODO: We should use CharString for char buffers and UnicodeString for char16_t buffers. 37 // Then we could change this to work only with wchar_t buffers. 38 static inline UBool 39 u_growAnyBufferFromStatic(void *context, 40 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, 41 int32_t length, int32_t size) { 42 // Use char* not void* to avoid the compiler's strict-aliasing assumptions 43 // and related warnings. 44 char *newBuffer=(char *)uprv_malloc(reqCapacity*size); 45 if(newBuffer!=nullptr) { 46 if(length>0) { 47 uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); 48 } 49 *pCapacity=reqCapacity; 50 } else { 51 *pCapacity=0; 52 } 53 54 /* release the old pBuffer if it was not statically allocated */ 55 if(*pBuffer!=(char *)context) { 56 uprv_free(*pBuffer); 57 } 58 59 *pBuffer=newBuffer; 60 return newBuffer!=nullptr; 61 } 62 63 /* helper function */ 64 static wchar_t* 65 _strToWCS(wchar_t *dest, 66 int32_t destCapacity, 67 int32_t *pDestLength, 68 const char16_t *src, 69 int32_t srcLength, 70 UErrorCode *pErrorCode){ 71 72 char stackBuffer [_STACK_BUFFER_CAPACITY]; 73 char* tempBuf = stackBuffer; 74 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; 75 char* tempBufLimit = stackBuffer + tempBufCapacity; 76 UConverter* conv = nullptr; 77 char* saveBuf = tempBuf; 78 wchar_t* intTarget=nullptr; 79 int32_t intTargetCapacity=0; 80 int count=0,retVal=0; 81 82 const char16_t *pSrcLimit =nullptr; 83 const char16_t *pSrc = src; 84 85 conv = u_getDefaultConverter(pErrorCode); 86 87 if(U_FAILURE(*pErrorCode)){ 88 return nullptr; 89 } 90 91 if(srcLength == -1){ 92 srcLength = u_strlen(pSrc); 93 } 94 95 pSrcLimit = pSrc + srcLength; 96 97 for(;;) { 98 UErrorCode bufferStatus = U_ZERO_ERROR; 99 100 /* convert to chars using default converter */ 101 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),&bufferStatus); 102 count =(tempBuf - saveBuf); 103 104 /* This should rarely occur */ 105 if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){ 106 tempBuf = saveBuf; 107 108 /* we don't have enough room on the stack grow the buffer */ 109 int32_t newCapacity = 2 * srcLength; 110 if(newCapacity <= tempBufCapacity) { 111 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; 112 } 113 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 114 newCapacity, count, 1)) { 115 goto cleanup; 116 } 117 118 saveBuf = tempBuf; 119 tempBufLimit = tempBuf + tempBufCapacity; 120 tempBuf = tempBuf + count; 121 } else { 122 if (U_FAILURE(bufferStatus)) { 123 *pErrorCode = bufferStatus; 124 goto cleanup; 125 } 126 break; 127 } 128 } 129 130 /* done with conversion null terminate the char buffer */ 131 if(count>=tempBufCapacity){ 132 tempBuf = saveBuf; 133 /* we don't have enough room on the stack grow the buffer */ 134 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 135 count+1, count, 1)) { 136 goto cleanup; 137 } 138 saveBuf = tempBuf; 139 } 140 141 saveBuf[count]=0; 142 143 144 /* allocate more space than required 145 * here we assume that every char requires 146 * no more than 2 wchar_ts 147 */ 148 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; 149 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); 150 151 if(intTarget){ 152 153 int32_t nulLen = 0; 154 int32_t remaining = intTargetCapacity; 155 wchar_t* pIntTarget=intTarget; 156 tempBuf = saveBuf; 157 158 /* now convert the mbs to wcs */ 159 for(;;){ 160 161 /* we can call the system API since we are sure that 162 * there is atleast 1 null in the input 163 */ 164 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); 165 166 if(retVal==-1){ 167 *pErrorCode = U_INVALID_CHAR_FOUND; 168 break; 169 }else if(retVal== remaining){/* should never occur */ 170 int numWritten = (pIntTarget-intTarget); 171 u_growAnyBufferFromStatic(nullptr,(void**) &intTarget, 172 &intTargetCapacity, 173 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, 174 numWritten, 175 sizeof(wchar_t)); 176 pIntTarget = intTarget; 177 remaining=intTargetCapacity; 178 179 if(nulLen!=count){ /*there are embedded nulls*/ 180 pIntTarget+=numWritten; 181 remaining-=numWritten; 182 } 183 184 }else{ 185 int32_t nulVal; 186 /*scan for nulls */ 187 /* we donot check for limit since tempBuf is null terminated */ 188 while(tempBuf[nulLen++] != 0){ 189 } 190 nulVal = (nulLen < srcLength) ? 1 : 0; 191 pIntTarget = pIntTarget + retVal+nulVal; 192 remaining -=(retVal+nulVal); 193 194 /* check if we have reached the source limit*/ 195 if(nulLen>=(count)){ 196 break; 197 } 198 } 199 } 200 count = (int32_t)(pIntTarget-intTarget); 201 202 if(0 < count && count <= destCapacity){ 203 uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); 204 } 205 206 if(pDestLength){ 207 *pDestLength = count; 208 } 209 210 /* free the allocated memory */ 211 uprv_free(intTarget); 212 213 }else{ 214 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 215 } 216 cleanup: 217 /* are we still using stack buffer */ 218 if(stackBuffer != saveBuf){ 219 uprv_free(saveBuf); 220 } 221 u_terminateWChars(dest,destCapacity,count,pErrorCode); 222 223 u_releaseDefaultConverter(conv); 224 225 return dest; 226 } 227 #endif 228 229 U_CAPI wchar_t* U_EXPORT2 230 u_strToWCS(wchar_t *dest, 231 int32_t destCapacity, 232 int32_t *pDestLength, 233 const char16_t *src, 234 int32_t srcLength, 235 UErrorCode *pErrorCode){ 236 237 /* args check */ 238 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){ 239 return nullptr; 240 } 241 242 if( (src==nullptr && srcLength!=0) || srcLength < -1 || 243 (destCapacity<0) || (dest == nullptr && destCapacity > 0) 244 ) { 245 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 246 return nullptr; 247 } 248 249 #ifdef U_WCHAR_IS_UTF16 250 /* wchar_t is UTF-16 just do a memcpy */ 251 if(srcLength == -1){ 252 srcLength = u_strlen(src); 253 } 254 if(0 < srcLength && srcLength <= destCapacity){ 255 u_memcpy((char16_t *)dest, src, srcLength); 256 } 257 if(pDestLength){ 258 *pDestLength = srcLength; 259 } 260 261 u_terminateUChars((char16_t *)dest,destCapacity,srcLength,pErrorCode); 262 263 return dest; 264 265 #elif defined U_WCHAR_IS_UTF32 266 267 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, 268 src, srcLength, pErrorCode); 269 270 #else 271 272 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); 273 274 #endif 275 276 } 277 278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 279 /* helper function */ 280 static char16_t* 281 _strFromWCS( char16_t *dest, 282 int32_t destCapacity, 283 int32_t *pDestLength, 284 const wchar_t *src, 285 int32_t srcLength, 286 UErrorCode *pErrorCode) 287 { 288 int32_t retVal =0, count =0 ; 289 UConverter* conv = nullptr; 290 char16_t* pTarget = nullptr; 291 char16_t* pTargetLimit = nullptr; 292 char16_t* target = nullptr; 293 294 char16_t uStack [_STACK_BUFFER_CAPACITY]; 295 296 wchar_t wStack[_STACK_BUFFER_CAPACITY]; 297 wchar_t* pWStack = wStack; 298 299 300 char cStack[_STACK_BUFFER_CAPACITY]; 301 int32_t cStackCap = _STACK_BUFFER_CAPACITY; 302 char* pCSrc=cStack; 303 char* pCSave=pCSrc; 304 char* pCSrcLimit=nullptr; 305 306 const wchar_t* pSrc = src; 307 const wchar_t* pSrcLimit = nullptr; 308 309 if(srcLength ==-1){ 310 /* if the wchar_t source is null terminated we can safely 311 * assume that there are no embedded nulls, this is a fast 312 * path for null terminated strings. 313 */ 314 for(;;){ 315 /* convert wchars to chars */ 316 retVal = uprv_wcstombs(pCSrc,src, cStackCap); 317 318 if(retVal == -1){ 319 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 320 goto cleanup; 321 }else if(retVal >= (cStackCap-1)){ 322 /* Should rarely occur */ 323 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 324 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); 325 pCSave = pCSrc; 326 }else{ 327 /* converted every thing */ 328 pCSrc = pCSrc+retVal; 329 break; 330 } 331 } 332 333 }else{ 334 /* here the source is not null terminated 335 * so it may have nulls embedded and we need to 336 * do some extra processing 337 */ 338 int32_t remaining =cStackCap; 339 340 pSrcLimit = src + srcLength; 341 342 for(;;){ 343 int32_t nulLen = 0; 344 345 /* find nulls in the string */ 346 while(nulLen<srcLength && pSrc[nulLen++]!=0){ 347 } 348 349 if((pSrc+nulLen) < pSrcLimit){ 350 /* check if we have enough room in pCSrc */ 351 if(remaining < (nulLen * MB_CUR_MAX)){ 352 /* should rarely occur */ 353 int32_t len = (pCSrc-pCSave); 354 pCSrc = pCSave; 355 /* we do not have enough room so grow the buffer*/ 356 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 357 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 358 359 pCSave = pCSrc; 360 pCSrc = pCSave+len; 361 remaining = cStackCap-(pCSrc - pCSave); 362 } 363 364 /* we have found a null so convert the 365 * chunk from beginning of non-null char to null 366 */ 367 retVal = uprv_wcstombs(pCSrc,pSrc,remaining); 368 369 if(retVal==-1){ 370 /* an error occurred bail out */ 371 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 372 goto cleanup; 373 } 374 375 pCSrc += retVal+1 /* already null terminated */; 376 377 pSrc += nulLen; /* skip past the null */ 378 srcLength-=nulLen; /* decrement the srcLength */ 379 remaining -= (pCSrc-pCSave); 380 381 382 }else{ 383 /* the source is not null terminated and we are 384 * end of source so we copy the source to a temp buffer 385 * null terminate it and convert wchar_ts to chars 386 */ 387 if(nulLen >= _STACK_BUFFER_CAPACITY){ 388 /* Should rarely occur */ 389 /* allocate new buffer buffer */ 390 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); 391 if(pWStack==nullptr){ 392 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 393 goto cleanup; 394 } 395 } 396 if(nulLen>0){ 397 /* copy the contents to tempStack */ 398 uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); 399 } 400 401 /* null terminate the tempBuffer */ 402 pWStack[nulLen] =0 ; 403 404 if(remaining < (nulLen * MB_CUR_MAX)){ 405 /* Should rarely occur */ 406 int32_t len = (pCSrc-pCSave); 407 pCSrc = pCSave; 408 /* we do not have enough room so grow the buffer*/ 409 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 410 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 411 412 pCSave = pCSrc; 413 pCSrc = pCSave+len; 414 remaining = cStackCap-(pCSrc - pCSave); 415 } 416 /* convert to chars */ 417 retVal = uprv_wcstombs(pCSrc,pWStack,remaining); 418 419 pCSrc += retVal; 420 pSrc += nulLen; 421 srcLength-=nulLen; /* decrement the srcLength */ 422 break; 423 } 424 } 425 } 426 427 /* OK..now we have converted from wchar_ts to chars now 428 * convert chars to UChars 429 */ 430 pCSrcLimit = pCSrc; 431 pCSrc = pCSave; 432 pTarget = target= dest; 433 pTargetLimit = dest + destCapacity; 434 435 conv= u_getDefaultConverter(pErrorCode); 436 437 if(U_FAILURE(*pErrorCode)|| conv==nullptr){ 438 goto cleanup; 439 } 440 441 for(;;) { 442 UErrorCode bufferStatus = U_ZERO_ERROR; 443 444 /* convert to stack buffer*/ 445 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),&bufferStatus); 446 447 /* increment count to number written to stack */ 448 count+= pTarget - target; 449 450 if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){ 451 target = uStack; 452 pTarget = uStack; 453 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; 454 } else { 455 if (U_FAILURE(bufferStatus)) { 456 *pErrorCode = bufferStatus; 457 } 458 break; 459 } 460 461 } 462 463 if(pDestLength){ 464 *pDestLength =count; 465 } 466 467 u_terminateUChars(dest,destCapacity,count,pErrorCode); 468 469 cleanup: 470 471 if(cStack != pCSave){ 472 uprv_free(pCSave); 473 } 474 475 if(wStack != pWStack){ 476 uprv_free(pWStack); 477 } 478 479 u_releaseDefaultConverter(conv); 480 481 return dest; 482 } 483 #endif 484 485 U_CAPI char16_t* U_EXPORT2 486 u_strFromWCS(char16_t *dest, 487 int32_t destCapacity, 488 int32_t *pDestLength, 489 const wchar_t *src, 490 int32_t srcLength, 491 UErrorCode *pErrorCode) 492 { 493 494 /* args check */ 495 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){ 496 return nullptr; 497 } 498 499 if( (src==nullptr && srcLength!=0) || srcLength < -1 || 500 (destCapacity<0) || (dest == nullptr && destCapacity > 0) 501 ) { 502 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 503 return nullptr; 504 } 505 506 #ifdef U_WCHAR_IS_UTF16 507 /* wchar_t is UTF-16 just do a memcpy */ 508 if(srcLength == -1){ 509 srcLength = u_strlen((const char16_t *)src); 510 } 511 if(0 < srcLength && srcLength <= destCapacity){ 512 u_memcpy(dest, (const char16_t *)src, srcLength); 513 } 514 if(pDestLength){ 515 *pDestLength = srcLength; 516 } 517 518 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 519 520 return dest; 521 522 #elif defined U_WCHAR_IS_UTF32 523 524 return u_strFromUTF32(dest, destCapacity, pDestLength, 525 (UChar32*)src, srcLength, pErrorCode); 526 527 #else 528 529 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); 530 531 #endif 532 533 } 534 535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */