ucnv.cpp (96344B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * ucnv.c: 12 * Implements APIs for the ICU's codeset conversion library; 13 * mostly calls through internal functions; 14 * created by Bertrand A. Damiba 15 * 16 * Modification History: 17 * 18 * Date Name Description 19 * 04/04/99 helena Fixed internal header inclusion. 20 * 05/09/00 helena Added implementation to handle fallback mappings. 21 * 06/20/2000 helena OS/400 port changes; mostly typecast. 22 */ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_CONVERSION 27 28 #include <memory> 29 30 #include "unicode/ustring.h" 31 #include "unicode/ucnv.h" 32 #include "unicode/ucnv_err.h" 33 #include "unicode/uset.h" 34 #include "unicode/utf.h" 35 #include "unicode/utf16.h" 36 #include "putilimp.h" 37 #include "cmemory.h" 38 #include "cstring.h" 39 #include "uassert.h" 40 #include "utracimp.h" 41 #include "ustr_imp.h" 42 #include "ucnv_imp.h" 43 #include "ucnv_cnv.h" 44 #include "ucnv_bld.h" 45 46 /* size of intermediate and preflighting buffers in ucnv_convert() */ 47 #define CHUNK_SIZE 1024 48 49 typedef struct UAmbiguousConverter { 50 const char *name; 51 const char16_t variant5c; 52 } UAmbiguousConverter; 53 54 static const UAmbiguousConverter ambiguousConverters[]={ 55 { "ibm-897_P100-1995", 0xa5 }, 56 { "ibm-942_P120-1999", 0xa5 }, 57 { "ibm-943_P130-1999", 0xa5 }, 58 { "ibm-946_P100-1995", 0xa5 }, 59 { "ibm-33722_P120-1999", 0xa5 }, 60 { "ibm-1041_P100-1995", 0xa5 }, 61 /*{ "ibm-54191_P100-2006", 0xa5 },*/ 62 /*{ "ibm-62383_P100-2007", 0xa5 },*/ 63 /*{ "ibm-891_P100-1995", 0x20a9 },*/ 64 { "ibm-944_P100-1995", 0x20a9 }, 65 { "ibm-949_P110-1999", 0x20a9 }, 66 { "ibm-1363_P110-1997", 0x20a9 }, 67 { "ISO_2022,locale=ko,version=0", 0x20a9 }, 68 { "ibm-1088_P100-1995", 0x20a9 } 69 }; 70 71 /*Calls through createConverter */ 72 U_CAPI UConverter* U_EXPORT2 73 ucnv_open (const char *name, 74 UErrorCode * err) 75 { 76 UConverter *r; 77 78 if (err == nullptr || U_FAILURE (*err)) { 79 return nullptr; 80 } 81 82 r = ucnv_createConverter(nullptr, name, err); 83 return r; 84 } 85 86 U_CAPI UConverter* U_EXPORT2 87 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) 88 { 89 return ucnv_createConverterFromPackage(packageName, converterName, err); 90 } 91 92 /*Extracts the char16_t* to a char* and calls through createConverter */ 93 U_CAPI UConverter* U_EXPORT2 94 ucnv_openU (const char16_t * name, 95 UErrorCode * err) 96 { 97 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 98 99 if (err == nullptr || U_FAILURE(*err)) 100 return nullptr; 101 if (name == nullptr) 102 return ucnv_open (nullptr, err); 103 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) 104 { 105 *err = U_ILLEGAL_ARGUMENT_ERROR; 106 return nullptr; 107 } 108 return ucnv_open(u_austrcpy(asciiName, name), err); 109 } 110 111 /* Copy the string that is represented by the UConverterPlatform enum 112 * @param platformString An output buffer 113 * @param platform An enum representing a platform 114 * @return the length of the copied string. 115 */ 116 static int32_t 117 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) 118 { 119 switch (pltfrm) 120 { 121 case UCNV_IBM: 122 uprv_strcpy(platformString, "ibm-"); 123 return 4; 124 case UCNV_UNKNOWN: 125 break; 126 } 127 128 /* default to empty string */ 129 *platformString = 0; 130 return 0; 131 } 132 133 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls 134 *through createConverter*/ 135 U_CAPI UConverter* U_EXPORT2 136 ucnv_openCCSID (int32_t codepage, 137 UConverterPlatform platform, 138 UErrorCode * err) 139 { 140 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 141 int32_t myNameLen; 142 143 if (err == nullptr || U_FAILURE (*err)) 144 return nullptr; 145 146 /* ucnv_copyPlatformString could return "ibm-" or "cp" */ 147 myNameLen = ucnv_copyPlatformString(myName, platform); 148 T_CString_integerToString(myName + myNameLen, codepage, 10); 149 150 return ucnv_createConverter(nullptr, myName, err); 151 } 152 153 /* Creating a temporary stack-based object that can be used in one thread, 154 and created from a converter that is shared across threads. 155 */ 156 157 U_CAPI UConverter* U_EXPORT2 158 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) 159 { 160 UConverter *localConverter, *allocatedConverter; 161 int32_t stackBufferSize; 162 int32_t bufferSizeNeeded; 163 UErrorCode cbErr; 164 UConverterToUnicodeArgs toUArgs = { 165 sizeof(UConverterToUnicodeArgs), 166 true, 167 nullptr, 168 nullptr, 169 nullptr, 170 nullptr, 171 nullptr, 172 nullptr 173 }; 174 UConverterFromUnicodeArgs fromUArgs = { 175 sizeof(UConverterFromUnicodeArgs), 176 true, 177 nullptr, 178 nullptr, 179 nullptr, 180 nullptr, 181 nullptr, 182 nullptr 183 }; 184 185 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); 186 187 if (status == nullptr || U_FAILURE(*status)){ 188 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); 189 return nullptr; 190 } 191 192 if (cnv == nullptr) { 193 *status = U_ILLEGAL_ARGUMENT_ERROR; 194 UTRACE_EXIT_STATUS(*status); 195 return nullptr; 196 } 197 198 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", 199 ucnv_getName(cnv, status), cnv, stackBuffer); 200 201 if (cnv->sharedData->impl->safeClone != nullptr) { 202 /* call the custom safeClone function for sizing */ 203 bufferSizeNeeded = 0; 204 cnv->sharedData->impl->safeClone(cnv, nullptr, &bufferSizeNeeded, status); 205 if (U_FAILURE(*status)) { 206 UTRACE_EXIT_STATUS(*status); 207 return nullptr; 208 } 209 } 210 else 211 { 212 /* inherent sizing */ 213 bufferSizeNeeded = sizeof(UConverter); 214 } 215 216 if (pBufferSize == nullptr) { 217 stackBufferSize = 1; 218 pBufferSize = &stackBufferSize; 219 } else { 220 stackBufferSize = *pBufferSize; 221 if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 222 *pBufferSize = bufferSizeNeeded; 223 UTRACE_EXIT_VALUE(bufferSizeNeeded); 224 return nullptr; 225 } 226 } 227 228 /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. 229 * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed. 230 */ 231 if (stackBuffer) { 232 uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer); 233 uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1); 234 ptrdiff_t pointerAdjustment = aligned_p - p; 235 if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) { 236 stackBuffer = reinterpret_cast<void *>(aligned_p); 237 stackBufferSize -= static_cast<int32_t>(pointerAdjustment); 238 } else { 239 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 240 stackBufferSize = 1; 241 } 242 } 243 244 /* Now, see if we must allocate any memory */ 245 if (stackBufferSize < bufferSizeNeeded || stackBuffer == nullptr) 246 { 247 /* allocate one here...*/ 248 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); 249 250 if(localConverter == nullptr) { 251 *status = U_MEMORY_ALLOCATION_ERROR; 252 UTRACE_EXIT_STATUS(*status); 253 return nullptr; 254 } 255 // If pBufferSize was nullptr as the input, pBufferSize is set to &stackBufferSize in this function. 256 if (pBufferSize != &stackBufferSize) { 257 *status = U_SAFECLONE_ALLOCATED_WARNING; 258 } 259 260 /* record the fact that memory was allocated */ 261 *pBufferSize = bufferSizeNeeded; 262 } else { 263 /* just use the stack buffer */ 264 localConverter = (UConverter*) stackBuffer; 265 allocatedConverter = nullptr; 266 } 267 268 uprv_memset(localConverter, 0, bufferSizeNeeded); 269 270 /* Copy initial state */ 271 uprv_memcpy(localConverter, cnv, sizeof(UConverter)); 272 localConverter->isCopyLocal = localConverter->isExtraLocal = false; 273 274 /* copy the substitution string */ 275 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 276 localConverter->subChars = (uint8_t *)localConverter->subUChars; 277 } else { 278 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 279 if (localConverter->subChars == nullptr) { 280 uprv_free(allocatedConverter); 281 UTRACE_EXIT_STATUS(*status); 282 return nullptr; 283 } 284 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 285 } 286 287 /* now either call the safeclone fcn or not */ 288 if (cnv->sharedData->impl->safeClone != nullptr) { 289 /* call the custom safeClone function */ 290 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); 291 } 292 293 if(localConverter==nullptr || U_FAILURE(*status)) { 294 if (allocatedConverter != nullptr && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { 295 uprv_free(allocatedConverter->subChars); 296 } 297 uprv_free(allocatedConverter); 298 UTRACE_EXIT_STATUS(*status); 299 return nullptr; 300 } 301 302 /* increment refcount of shared data if needed */ 303 if (cnv->sharedData->isReferenceCounted) { 304 ucnv_incrementRefCount(cnv->sharedData); 305 } 306 307 if(localConverter == (UConverter*)stackBuffer) { 308 /* we're using user provided data - set to not destroy */ 309 localConverter->isCopyLocal = true; 310 } 311 312 /* allow callback functions to handle any memory allocation */ 313 toUArgs.converter = fromUArgs.converter = localConverter; 314 cbErr = U_ZERO_ERROR; 315 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, nullptr, 0, UCNV_CLONE, &cbErr); 316 cbErr = U_ZERO_ERROR; 317 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_CLONE, &cbErr); 318 319 UTRACE_EXIT_PTR_STATUS(localConverter, *status); 320 return localConverter; 321 } 322 323 U_CAPI UConverter* U_EXPORT2 324 ucnv_clone(const UConverter* cnv, UErrorCode *status) 325 { 326 return ucnv_safeClone(cnv, nullptr, nullptr, status); 327 } 328 329 /*Decreases the reference counter in the shared immutable section of the object 330 *and frees the mutable part*/ 331 332 U_CAPI void U_EXPORT2 333 ucnv_close (UConverter * converter) 334 { 335 UErrorCode errorCode = U_ZERO_ERROR; 336 337 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); 338 339 if (converter == nullptr) 340 { 341 UTRACE_EXIT(); 342 return; 343 } 344 345 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", 346 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); 347 348 /* In order to speed up the close, only call the callbacks when they have been changed. 349 This performance check will only work when the callbacks are set within a shared library 350 or from user code that statically links this code. */ 351 /* first, notify the callback functions that the converter is closed */ 352 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 353 UConverterToUnicodeArgs toUArgs = { 354 sizeof(UConverterToUnicodeArgs), 355 true, 356 nullptr, 357 nullptr, 358 nullptr, 359 nullptr, 360 nullptr, 361 nullptr 362 }; 363 364 toUArgs.converter = converter; 365 errorCode = U_ZERO_ERROR; 366 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, nullptr, 0, UCNV_CLOSE, &errorCode); 367 } 368 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 369 UConverterFromUnicodeArgs fromUArgs = { 370 sizeof(UConverterFromUnicodeArgs), 371 true, 372 nullptr, 373 nullptr, 374 nullptr, 375 nullptr, 376 nullptr, 377 nullptr 378 }; 379 fromUArgs.converter = converter; 380 errorCode = U_ZERO_ERROR; 381 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_CLOSE, &errorCode); 382 } 383 384 if (converter->sharedData->impl->close != nullptr) { 385 converter->sharedData->impl->close(converter); 386 } 387 388 if (converter->subChars != (uint8_t *)converter->subUChars) { 389 uprv_free(converter->subChars); 390 } 391 392 if (converter->sharedData->isReferenceCounted) { 393 ucnv_unloadSharedDataIfReady(converter->sharedData); 394 } 395 396 if(!converter->isCopyLocal){ 397 uprv_free(converter); 398 } 399 400 UTRACE_EXIT(); 401 } 402 403 /*returns a single Name from the list, will return nullptr if out of bounds 404 */ 405 U_CAPI const char* U_EXPORT2 406 ucnv_getAvailableName (int32_t n) 407 { 408 if (0 <= n && n <= 0xffff) { 409 UErrorCode err = U_ZERO_ERROR; 410 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); 411 if (U_SUCCESS(err)) { 412 return name; 413 } 414 } 415 return nullptr; 416 } 417 418 U_CAPI int32_t U_EXPORT2 419 ucnv_countAvailable () 420 { 421 UErrorCode err = U_ZERO_ERROR; 422 return ucnv_bld_countAvailableConverters(&err); 423 } 424 425 U_CAPI void U_EXPORT2 426 ucnv_getSubstChars (const UConverter * converter, 427 char *mySubChar, 428 int8_t * len, 429 UErrorCode * err) 430 { 431 if (U_FAILURE (*err)) 432 return; 433 434 if (converter->subCharLen <= 0) { 435 /* Unicode string or empty string from ucnv_setSubstString(). */ 436 *len = 0; 437 return; 438 } 439 440 if (*len < converter->subCharLen) /*not enough space in subChars */ 441 { 442 *err = U_INDEX_OUTOFBOUNDS_ERROR; 443 return; 444 } 445 446 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ 447 *len = converter->subCharLen; /*store # of bytes copied to buffer */ 448 } 449 450 U_CAPI void U_EXPORT2 451 ucnv_setSubstChars (UConverter * converter, 452 const char *mySubChar, 453 int8_t len, 454 UErrorCode * err) 455 { 456 if (U_FAILURE (*err)) 457 return; 458 459 /*Makes sure that the subChar is within the codepages char length boundaries */ 460 if ((len > converter->sharedData->staticData->maxBytesPerChar) 461 || (len < converter->sharedData->staticData->minBytesPerChar)) 462 { 463 *err = U_ILLEGAL_ARGUMENT_ERROR; 464 return; 465 } 466 467 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ 468 converter->subCharLen = len; /*sets the new len */ 469 470 /* 471 * There is currently (2001Feb) no separate API to set/get subChar1. 472 * In order to always have subChar written after it is explicitly set, 473 * we set subChar1 to 0. 474 */ 475 converter->subChar1 = 0; 476 } 477 478 U_CAPI void U_EXPORT2 479 ucnv_setSubstString(UConverter *cnv, 480 const char16_t *s, 481 int32_t length, 482 UErrorCode *err) { 483 alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE]; 484 char chars[UCNV_ERROR_BUFFER_LENGTH]; 485 486 UConverter *clone; 487 uint8_t *subChars; 488 int32_t cloneSize, length8; 489 490 /* Let the following functions check all arguments. */ 491 cloneSize = sizeof(cloneBuffer); 492 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); 493 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, nullptr, nullptr, nullptr, err); 494 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); 495 ucnv_close(clone); 496 if (U_FAILURE(*err)) { 497 return; 498 } 499 500 if (cnv->sharedData->impl->writeSub == nullptr 501 #if !UCONFIG_NO_LEGACY_CONVERSION 502 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && 503 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) 504 #endif 505 ) { 506 /* The converter is not stateful. Store the charset bytes as a fixed string. */ 507 subChars = (uint8_t *)chars; 508 } else { 509 /* 510 * The converter has a non-default writeSub() function, indicating 511 * that it is stateful. 512 * Store the Unicode string for on-the-fly conversion for correct 513 * state handling. 514 */ 515 if (length > UCNV_ERROR_BUFFER_LENGTH) { 516 /* 517 * Should not occur. The converter should output at least one byte 518 * per char16_t, which means that ucnv_fromUChars() should catch all 519 * overflows. 520 */ 521 *err = U_BUFFER_OVERFLOW_ERROR; 522 return; 523 } 524 subChars = (uint8_t *)s; 525 if (length < 0) { 526 length = u_strlen(s); 527 } 528 length8 = length * U_SIZEOF_UCHAR; 529 } 530 531 /* 532 * For storing the substitution string, select either the small buffer inside 533 * UConverter or allocate a subChars buffer. 534 */ 535 if (length8 > UCNV_MAX_SUBCHAR_LEN) { 536 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ 537 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 538 /* Allocate a new buffer for the string. */ 539 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 540 if (cnv->subChars == nullptr) { 541 cnv->subChars = (uint8_t *)cnv->subUChars; 542 *err = U_MEMORY_ALLOCATION_ERROR; 543 return; 544 } 545 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 546 } 547 } 548 549 /* Copy the substitution string into the UConverter or its subChars buffer. */ 550 if (length8 == 0) { 551 cnv->subCharLen = 0; 552 } else { 553 uprv_memcpy(cnv->subChars, subChars, length8); 554 if (subChars == (uint8_t *)chars) { 555 cnv->subCharLen = (int8_t)length8; 556 } else /* subChars == s */ { 557 cnv->subCharLen = (int8_t)-length; 558 } 559 } 560 561 /* See comment in ucnv_setSubstChars(). */ 562 cnv->subChar1 = 0; 563 } 564 565 /*resets the internal states of a converter 566 *goal : have the same behaviour than a freshly created converter 567 */ 568 static void _reset(UConverter *converter, UConverterResetChoice choice, 569 UBool callCallback) { 570 if(converter == nullptr) { 571 return; 572 } 573 574 if(callCallback) { 575 /* first, notify the callback functions that the converter is reset */ 576 UErrorCode errorCode; 577 578 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 579 UConverterToUnicodeArgs toUArgs = { 580 sizeof(UConverterToUnicodeArgs), 581 true, 582 nullptr, 583 nullptr, 584 nullptr, 585 nullptr, 586 nullptr, 587 nullptr 588 }; 589 toUArgs.converter = converter; 590 errorCode = U_ZERO_ERROR; 591 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, nullptr, 0, UCNV_RESET, &errorCode); 592 } 593 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 594 UConverterFromUnicodeArgs fromUArgs = { 595 sizeof(UConverterFromUnicodeArgs), 596 true, 597 nullptr, 598 nullptr, 599 nullptr, 600 nullptr, 601 nullptr, 602 nullptr 603 }; 604 fromUArgs.converter = converter; 605 errorCode = U_ZERO_ERROR; 606 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, nullptr, 0, 0, UCNV_RESET, &errorCode); 607 } 608 } 609 610 /* now reset the converter itself */ 611 if(choice<=UCNV_RESET_TO_UNICODE) { 612 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; 613 converter->mode = 0; 614 converter->toULength = 0; 615 converter->invalidCharLength = converter->UCharErrorBufferLength = 0; 616 converter->preToULength = 0; 617 } 618 if(choice!=UCNV_RESET_TO_UNICODE) { 619 converter->fromUnicodeStatus = 0; 620 converter->fromUChar32 = 0; 621 converter->invalidUCharLength = converter->charErrorBufferLength = 0; 622 converter->preFromUFirstCP = U_SENTINEL; 623 converter->preFromULength = 0; 624 } 625 626 if (converter->sharedData->impl->reset != nullptr) { 627 /* call the custom reset function */ 628 converter->sharedData->impl->reset(converter, choice); 629 } 630 } 631 632 U_CAPI void U_EXPORT2 633 ucnv_reset(UConverter *converter) 634 { 635 _reset(converter, UCNV_RESET_BOTH, true); 636 } 637 638 U_CAPI void U_EXPORT2 639 ucnv_resetToUnicode(UConverter *converter) 640 { 641 _reset(converter, UCNV_RESET_TO_UNICODE, true); 642 } 643 644 U_CAPI void U_EXPORT2 645 ucnv_resetFromUnicode(UConverter *converter) 646 { 647 _reset(converter, UCNV_RESET_FROM_UNICODE, true); 648 } 649 650 U_CAPI int8_t U_EXPORT2 651 ucnv_getMaxCharSize (const UConverter * converter) 652 { 653 return converter->maxBytesPerUChar; 654 } 655 656 657 U_CAPI int8_t U_EXPORT2 658 ucnv_getMinCharSize (const UConverter * converter) 659 { 660 return converter->sharedData->staticData->minBytesPerChar; 661 } 662 663 U_CAPI const char* U_EXPORT2 664 ucnv_getName (const UConverter * converter, UErrorCode * err) 665 666 { 667 if (U_FAILURE (*err)) 668 return nullptr; 669 if(converter->sharedData->impl->getName){ 670 const char* temp= converter->sharedData->impl->getName(converter); 671 if(temp) 672 return temp; 673 } 674 return converter->sharedData->staticData->name; 675 } 676 677 U_CAPI int32_t U_EXPORT2 678 ucnv_getCCSID(const UConverter * converter, 679 UErrorCode * err) 680 { 681 int32_t ccsid; 682 if (U_FAILURE (*err)) 683 return -1; 684 685 ccsid = converter->sharedData->staticData->codepage; 686 if (ccsid == 0) { 687 /* Rare case. This is for cases like gb18030, 688 which doesn't have an IBM canonical name, but does have an IBM alias. */ 689 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); 690 if (U_SUCCESS(*err) && standardName) { 691 const char *ccsidStr = uprv_strchr(standardName, '-'); 692 if (ccsidStr) { 693 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ 694 } 695 } 696 } 697 return ccsid; 698 } 699 700 701 U_CAPI UConverterPlatform U_EXPORT2 702 ucnv_getPlatform (const UConverter * converter, 703 UErrorCode * err) 704 { 705 if (U_FAILURE (*err)) 706 return UCNV_UNKNOWN; 707 708 return (UConverterPlatform)converter->sharedData->staticData->platform; 709 } 710 711 U_CAPI void U_EXPORT2 712 ucnv_getToUCallBack (const UConverter * converter, 713 UConverterToUCallback *action, 714 const void **context) 715 { 716 *action = converter->fromCharErrorBehaviour; 717 *context = converter->toUContext; 718 } 719 720 U_CAPI void U_EXPORT2 721 ucnv_getFromUCallBack (const UConverter * converter, 722 UConverterFromUCallback *action, 723 const void **context) 724 { 725 *action = converter->fromUCharErrorBehaviour; 726 *context = converter->fromUContext; 727 } 728 729 U_CAPI void U_EXPORT2 730 ucnv_setToUCallBack (UConverter * converter, 731 UConverterToUCallback newAction, 732 const void* newContext, 733 UConverterToUCallback *oldAction, 734 const void** oldContext, 735 UErrorCode * err) 736 { 737 if (U_FAILURE (*err)) 738 return; 739 if (oldAction) *oldAction = converter->fromCharErrorBehaviour; 740 converter->fromCharErrorBehaviour = newAction; 741 if (oldContext) *oldContext = converter->toUContext; 742 converter->toUContext = newContext; 743 } 744 745 U_CAPI void U_EXPORT2 746 ucnv_setFromUCallBack (UConverter * converter, 747 UConverterFromUCallback newAction, 748 const void* newContext, 749 UConverterFromUCallback *oldAction, 750 const void** oldContext, 751 UErrorCode * err) 752 { 753 if (U_FAILURE (*err)) 754 return; 755 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; 756 converter->fromUCharErrorBehaviour = newAction; 757 if (oldContext) *oldContext = converter->fromUContext; 758 converter->fromUContext = newContext; 759 } 760 761 static void 762 _updateOffsets(int32_t *offsets, int32_t length, 763 int32_t sourceIndex, int32_t errorInputLength) { 764 int32_t *limit; 765 int32_t delta, offset; 766 767 if(sourceIndex>=0) { 768 /* 769 * adjust each offset by adding the previous sourceIndex 770 * minus the length of the input sequence that caused an 771 * error, if any 772 */ 773 delta=sourceIndex-errorInputLength; 774 } else { 775 /* 776 * set each offset to -1 because this conversion function 777 * does not handle offsets 778 */ 779 delta=-1; 780 } 781 782 limit=offsets+length; 783 if(delta==0) { 784 /* most common case, nothing to do */ 785 } else if(delta>0) { 786 /* add the delta to each offset (but not if the offset is <0) */ 787 while(offsets<limit) { 788 offset=*offsets; 789 if(offset>=0) { 790 *offsets=offset+delta; 791 } 792 ++offsets; 793 } 794 } else /* delta<0 */ { 795 /* 796 * set each offset to -1 because this conversion function 797 * does not handle offsets 798 * or the error input sequence started in a previous buffer 799 */ 800 while(offsets<limit) { 801 *offsets++=-1; 802 } 803 } 804 } 805 806 /* ucnv_fromUnicode --------------------------------------------------------- */ 807 808 /* 809 * Implementation note for m:n conversions 810 * 811 * While collecting source units to find the longest match for m:n conversion, 812 * some source units may need to be stored for a partial match. 813 * When a second buffer does not yield a match on all of the previously stored 814 * source units, then they must be "replayed", i.e., fed back into the converter. 815 * 816 * The code relies on the fact that replaying will not nest - 817 * converting a replay buffer will not result in a replay. 818 * This is because a replay is necessary only after the _continuation_ of a 819 * partial match failed, but a replay buffer is converted as a whole. 820 * It may result in some of its units being stored again for a partial match, 821 * but there will not be a continuation _during_ the replay which could fail. 822 * 823 * It is conceivable that a callback function could call the converter 824 * recursively in a way that causes another replay to be stored, but that 825 * would be an error in the callback function. 826 * Such violations will cause assertion failures in a debug build, 827 * and wrong output, but they will not cause a crash. 828 */ 829 830 static void 831 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { 832 UConverterFromUnicode fromUnicode; 833 UConverter *cnv; 834 const char16_t *s; 835 char *t; 836 int32_t *offsets; 837 int32_t sourceIndex; 838 int32_t errorInputLength; 839 UBool converterSawEndOfInput, calledCallback; 840 841 /* variables for m:n conversion */ 842 char16_t replay[UCNV_EXT_MAX_UCHARS]; 843 const char16_t *realSource, *realSourceLimit; 844 int32_t realSourceIndex; 845 UBool realFlush; 846 847 cnv=pArgs->converter; 848 s=pArgs->source; 849 t=pArgs->target; 850 offsets=pArgs->offsets; 851 852 /* get the converter implementation function */ 853 sourceIndex=0; 854 if(offsets==nullptr) { 855 fromUnicode=cnv->sharedData->impl->fromUnicode; 856 } else { 857 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; 858 if(fromUnicode==nullptr) { 859 /* there is no WithOffsets implementation */ 860 fromUnicode=cnv->sharedData->impl->fromUnicode; 861 /* we will write -1 for each offset */ 862 sourceIndex=-1; 863 } 864 } 865 866 if(cnv->preFromULength>=0) { 867 /* normal mode */ 868 realSource=nullptr; 869 870 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 871 realSourceLimit=nullptr; 872 realFlush=false; 873 realSourceIndex=0; 874 } else { 875 /* 876 * Previous m:n conversion stored source units from a partial match 877 * and failed to consume all of them. 878 * We need to "replay" them from a temporary buffer and convert them first. 879 */ 880 realSource=pArgs->source; 881 realSourceLimit=pArgs->sourceLimit; 882 realFlush=pArgs->flush; 883 realSourceIndex=sourceIndex; 884 885 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 886 pArgs->source=replay; 887 pArgs->sourceLimit=replay-cnv->preFromULength; 888 pArgs->flush=false; 889 sourceIndex=-1; 890 891 cnv->preFromULength=0; 892 } 893 894 /* 895 * loop for conversion and error handling 896 * 897 * loop { 898 * convert 899 * loop { 900 * update offsets 901 * handle end of input 902 * handle errors/call callback 903 * } 904 * } 905 */ 906 for(;;) { 907 if(U_SUCCESS(*err)) { 908 /* convert */ 909 fromUnicode(pArgs, err); 910 911 /* 912 * set a flag for whether the converter 913 * successfully processed the end of the input 914 * 915 * need not check cnv->preFromULength==0 because a replay (<0) will cause 916 * s<sourceLimit before converterSawEndOfInput is checked 917 */ 918 converterSawEndOfInput= 919 static_cast<UBool>(U_SUCCESS(*err) && 920 pArgs->flush && pArgs->source==pArgs->sourceLimit && 921 cnv->fromUChar32==0); 922 } else { 923 /* handle error from ucnv_convertEx() */ 924 converterSawEndOfInput=false; 925 } 926 927 /* no callback called yet for this iteration */ 928 calledCallback=false; 929 930 /* no sourceIndex adjustment for conversion, only for callback output */ 931 errorInputLength=0; 932 933 /* 934 * loop for offsets and error handling 935 * 936 * iterates at most 3 times: 937 * 1. to clean up after the conversion function 938 * 2. after the callback 939 * 3. after the callback again if there was truncated input 940 */ 941 for(;;) { 942 /* update offsets if we write any */ 943 if(offsets!=nullptr) { 944 int32_t length = static_cast<int32_t>(pArgs->target - t); 945 if(length>0) { 946 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 947 948 /* 949 * if a converter handles offsets and updates the offsets 950 * pointer at the end, then pArgs->offset should not change 951 * here; 952 * however, some converters do not handle offsets at all 953 * (sourceIndex<0) or may not update the offsets pointer 954 */ 955 pArgs->offsets=offsets+=length; 956 } 957 958 if(sourceIndex>=0) { 959 sourceIndex += static_cast<int32_t>(pArgs->source - s); 960 } 961 } 962 963 if(cnv->preFromULength<0) { 964 /* 965 * switch the source to new replay units (cannot occur while replaying) 966 * after offset handling and before end-of-input and callback handling 967 */ 968 if(realSource==nullptr) { 969 realSource=pArgs->source; 970 realSourceLimit=pArgs->sourceLimit; 971 realFlush=pArgs->flush; 972 realSourceIndex=sourceIndex; 973 974 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 975 pArgs->source=replay; 976 pArgs->sourceLimit=replay-cnv->preFromULength; 977 pArgs->flush=false; 978 if((sourceIndex+=cnv->preFromULength)<0) { 979 sourceIndex=-1; 980 } 981 982 cnv->preFromULength=0; 983 } else { 984 /* see implementation note before _fromUnicodeWithCallback() */ 985 U_ASSERT(realSource==nullptr); 986 *err=U_INTERNAL_PROGRAM_ERROR; 987 } 988 } 989 990 /* update pointers */ 991 s=pArgs->source; 992 t=pArgs->target; 993 994 if(U_SUCCESS(*err)) { 995 if(s<pArgs->sourceLimit) { 996 /* 997 * continue with the conversion loop while there is still input left 998 * (continue converting by breaking out of only the inner loop) 999 */ 1000 break; 1001 } else if(realSource!=nullptr) { 1002 /* switch back from replaying to the real source and continue */ 1003 pArgs->source=realSource; 1004 pArgs->sourceLimit=realSourceLimit; 1005 pArgs->flush=realFlush; 1006 sourceIndex=realSourceIndex; 1007 1008 realSource=nullptr; 1009 break; 1010 } else if(pArgs->flush && cnv->fromUChar32!=0) { 1011 /* 1012 * the entire input stream is consumed 1013 * and there is a partial, truncated input sequence left 1014 */ 1015 1016 /* inject an error and continue with callback handling */ 1017 *err=U_TRUNCATED_CHAR_FOUND; 1018 calledCallback=false; /* new error condition */ 1019 } else { 1020 /* input consumed */ 1021 if(pArgs->flush) { 1022 /* 1023 * return to the conversion loop once more if the flush 1024 * flag is set and the conversion function has not 1025 * successfully processed the end of the input yet 1026 * 1027 * (continue converting by breaking out of only the inner loop) 1028 */ 1029 if(!converterSawEndOfInput) { 1030 break; 1031 } 1032 1033 /* reset the converter without calling the callback function */ 1034 _reset(cnv, UCNV_RESET_FROM_UNICODE, false); 1035 } 1036 1037 /* done successfully */ 1038 return; 1039 } 1040 } 1041 1042 /* U_FAILURE(*err) */ 1043 { 1044 UErrorCode e; 1045 1046 if( calledCallback || 1047 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1048 (e!=U_INVALID_CHAR_FOUND && 1049 e!=U_ILLEGAL_CHAR_FOUND && 1050 e!=U_TRUNCATED_CHAR_FOUND) 1051 ) { 1052 /* 1053 * the callback did not or cannot resolve the error: 1054 * set output pointers and return 1055 * 1056 * the check for buffer overflow is redundant but it is 1057 * a high-runner case and hopefully documents the intent 1058 * well 1059 * 1060 * if we were replaying, then the replay buffer must be 1061 * copied back into the UConverter 1062 * and the real arguments must be restored 1063 */ 1064 if(realSource!=nullptr) { 1065 int32_t length; 1066 1067 U_ASSERT(cnv->preFromULength==0); 1068 1069 length = static_cast<int32_t>(pArgs->sourceLimit - pArgs->source); 1070 if(length>0) { 1071 u_memcpy(cnv->preFromU, pArgs->source, length); 1072 cnv->preFromULength = static_cast<int8_t>(-length); 1073 } 1074 1075 pArgs->source=realSource; 1076 pArgs->sourceLimit=realSourceLimit; 1077 pArgs->flush=realFlush; 1078 } 1079 1080 return; 1081 } 1082 } 1083 1084 /* callback handling */ 1085 { 1086 UChar32 codePoint; 1087 1088 /* get and write the code point */ 1089 codePoint=cnv->fromUChar32; 1090 errorInputLength=0; 1091 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); 1092 cnv->invalidUCharLength = static_cast<int8_t>(errorInputLength); 1093 1094 /* set the converter state to deal with the next character */ 1095 cnv->fromUChar32=0; 1096 1097 /* call the callback function */ 1098 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, 1099 cnv->invalidUCharBuffer, errorInputLength, codePoint, 1100 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, 1101 err); 1102 } 1103 1104 /* 1105 * loop back to the offset handling 1106 * 1107 * this flag will indicate after offset handling 1108 * that a callback was called; 1109 * if the callback did not resolve the error, then we return 1110 */ 1111 calledCallback=true; 1112 } 1113 } 1114 } 1115 1116 /* 1117 * Output the fromUnicode overflow buffer. 1118 * Call this function if(cnv->charErrorBufferLength>0). 1119 * @return true if overflow 1120 */ 1121 static UBool 1122 ucnv_outputOverflowFromUnicode(UConverter *cnv, 1123 char **target, const char *targetLimit, 1124 int32_t **pOffsets, 1125 UErrorCode *err) { 1126 int32_t *offsets; 1127 char *overflow, *t; 1128 int32_t i, length; 1129 1130 t=*target; 1131 if(pOffsets!=nullptr) { 1132 offsets=*pOffsets; 1133 } else { 1134 offsets=nullptr; 1135 } 1136 1137 overflow = reinterpret_cast<char*>(cnv->charErrorBuffer); 1138 length=cnv->charErrorBufferLength; 1139 i=0; 1140 while(i<length) { 1141 if(t==targetLimit) { 1142 /* the overflow buffer contains too much, keep the rest */ 1143 int32_t j=0; 1144 1145 do { 1146 overflow[j++]=overflow[i++]; 1147 } while(i<length); 1148 1149 cnv->charErrorBufferLength = static_cast<int8_t>(j); 1150 *target=t; 1151 if(offsets!=nullptr) { 1152 *pOffsets=offsets; 1153 } 1154 *err=U_BUFFER_OVERFLOW_ERROR; 1155 return true; 1156 } 1157 1158 /* copy the overflow contents to the target */ 1159 *t++=overflow[i++]; 1160 if(offsets!=nullptr) { 1161 *offsets++=-1; /* no source index available for old output */ 1162 } 1163 } 1164 1165 /* the overflow buffer is completely copied to the target */ 1166 cnv->charErrorBufferLength=0; 1167 *target=t; 1168 if(offsets!=nullptr) { 1169 *pOffsets=offsets; 1170 } 1171 return false; 1172 } 1173 1174 U_CAPI void U_EXPORT2 1175 ucnv_fromUnicode(UConverter *cnv, 1176 char **target, const char *targetLimit, 1177 const char16_t **source, const char16_t *sourceLimit, 1178 int32_t *offsets, 1179 UBool flush, 1180 UErrorCode *err) { 1181 UConverterFromUnicodeArgs args; 1182 const char16_t *s; 1183 char *t; 1184 1185 /* check parameters */ 1186 if(err==nullptr || U_FAILURE(*err)) { 1187 return; 1188 } 1189 1190 if(cnv==nullptr || target==nullptr || source==nullptr) { 1191 *err=U_ILLEGAL_ARGUMENT_ERROR; 1192 return; 1193 } 1194 1195 s=*source; 1196 t=*target; 1197 1198 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { 1199 /* 1200 Prevent code from going into an infinite loop in case we do hit this 1201 limit. The limit pointer is expected to be on a char16_t * boundary. 1202 This also prevents the next argument check from failing. 1203 */ 1204 sourceLimit = (const char16_t *)(((const char *)sourceLimit) - 1); 1205 } 1206 1207 /* 1208 * All these conditions should never happen. 1209 * 1210 * 1) Make sure that the limits are >= to the address source or target 1211 * 1212 * 2) Make sure that the buffer sizes do not exceed the number range for 1213 * int32_t because some functions use the size (in units or bytes) 1214 * rather than comparing pointers, and because offsets are int32_t values. 1215 * 1216 * size_t is guaranteed to be unsigned and large enough for the job. 1217 * 1218 * Return with an error instead of adjusting the limits because we would 1219 * not be able to maintain the semantics that either the source must be 1220 * consumed or the target filled (unless an error occurs). 1221 * An adjustment would be targetLimit=t+0x7fffffff; for example. 1222 * 1223 * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer 1224 * to a char * pointer and provide an incomplete char16_t code unit. 1225 */ 1226 if (sourceLimit<s || targetLimit<t || 1227 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || 1228 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || 1229 (((const char *)sourceLimit-(const char *)s) & 1) != 0) 1230 { 1231 *err=U_ILLEGAL_ARGUMENT_ERROR; 1232 return; 1233 } 1234 1235 /* output the target overflow buffer */ 1236 if( cnv->charErrorBufferLength>0 && 1237 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) 1238 ) { 1239 /* U_BUFFER_OVERFLOW_ERROR */ 1240 return; 1241 } 1242 /* *target may have moved, therefore stop using t */ 1243 1244 if(!flush && s==sourceLimit && cnv->preFromULength>=0) { 1245 /* the overflow buffer is emptied and there is no new input: we are done */ 1246 return; 1247 } 1248 1249 /* 1250 * Do not simply return with a buffer overflow error if 1251 * !flush && t==targetLimit 1252 * because it is possible that the source will not generate any output. 1253 * For example, the skip callback may be called; 1254 * it does not output anything. 1255 */ 1256 1257 /* prepare the converter arguments */ 1258 args.converter=cnv; 1259 args.flush=flush; 1260 args.offsets=offsets; 1261 args.source=s; 1262 args.sourceLimit=sourceLimit; 1263 args.target=*target; 1264 args.targetLimit=targetLimit; 1265 args.size=sizeof(args); 1266 1267 _fromUnicodeWithCallback(&args, err); 1268 1269 *source=args.source; 1270 *target=args.target; 1271 } 1272 1273 /* ucnv_toUnicode() --------------------------------------------------------- */ 1274 1275 static void 1276 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1277 UConverterToUnicode toUnicode; 1278 UConverter *cnv; 1279 const char *s; 1280 char16_t *t; 1281 int32_t *offsets; 1282 int32_t sourceIndex; 1283 int32_t errorInputLength; 1284 UBool converterSawEndOfInput, calledCallback; 1285 1286 /* variables for m:n conversion */ 1287 char replay[UCNV_EXT_MAX_BYTES]; 1288 const char *realSource, *realSourceLimit; 1289 int32_t realSourceIndex; 1290 UBool realFlush; 1291 1292 cnv=pArgs->converter; 1293 s=pArgs->source; 1294 t=pArgs->target; 1295 offsets=pArgs->offsets; 1296 1297 /* get the converter implementation function */ 1298 sourceIndex=0; 1299 if(offsets==nullptr) { 1300 toUnicode=cnv->sharedData->impl->toUnicode; 1301 } else { 1302 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; 1303 if(toUnicode==nullptr) { 1304 /* there is no WithOffsets implementation */ 1305 toUnicode=cnv->sharedData->impl->toUnicode; 1306 /* we will write -1 for each offset */ 1307 sourceIndex=-1; 1308 } 1309 } 1310 1311 if(cnv->preToULength>=0) { 1312 /* normal mode */ 1313 realSource=nullptr; 1314 1315 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1316 realSourceLimit=nullptr; 1317 realFlush=false; 1318 realSourceIndex=0; 1319 } else { 1320 /* 1321 * Previous m:n conversion stored source units from a partial match 1322 * and failed to consume all of them. 1323 * We need to "replay" them from a temporary buffer and convert them first. 1324 */ 1325 realSource=pArgs->source; 1326 realSourceLimit=pArgs->sourceLimit; 1327 realFlush=pArgs->flush; 1328 realSourceIndex=sourceIndex; 1329 1330 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1331 pArgs->source=replay; 1332 pArgs->sourceLimit=replay-cnv->preToULength; 1333 pArgs->flush=false; 1334 sourceIndex=-1; 1335 1336 cnv->preToULength=0; 1337 } 1338 1339 /* 1340 * loop for conversion and error handling 1341 * 1342 * loop { 1343 * convert 1344 * loop { 1345 * update offsets 1346 * handle end of input 1347 * handle errors/call callback 1348 * } 1349 * } 1350 */ 1351 for(;;) { 1352 if(U_SUCCESS(*err)) { 1353 /* convert */ 1354 toUnicode(pArgs, err); 1355 1356 /* 1357 * set a flag for whether the converter 1358 * successfully processed the end of the input 1359 * 1360 * need not check cnv->preToULength==0 because a replay (<0) will cause 1361 * s<sourceLimit before converterSawEndOfInput is checked 1362 */ 1363 converterSawEndOfInput= 1364 static_cast<UBool>(U_SUCCESS(*err) && 1365 pArgs->flush && pArgs->source==pArgs->sourceLimit && 1366 cnv->toULength==0); 1367 } else { 1368 /* handle error from getNextUChar() or ucnv_convertEx() */ 1369 converterSawEndOfInput=false; 1370 } 1371 1372 /* no callback called yet for this iteration */ 1373 calledCallback=false; 1374 1375 /* no sourceIndex adjustment for conversion, only for callback output */ 1376 errorInputLength=0; 1377 1378 /* 1379 * loop for offsets and error handling 1380 * 1381 * iterates at most 3 times: 1382 * 1. to clean up after the conversion function 1383 * 2. after the callback 1384 * 3. after the callback again if there was truncated input 1385 */ 1386 for(;;) { 1387 /* update offsets if we write any */ 1388 if(offsets!=nullptr) { 1389 int32_t length = static_cast<int32_t>(pArgs->target - t); 1390 if(length>0) { 1391 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1392 1393 /* 1394 * if a converter handles offsets and updates the offsets 1395 * pointer at the end, then pArgs->offset should not change 1396 * here; 1397 * however, some converters do not handle offsets at all 1398 * (sourceIndex<0) or may not update the offsets pointer 1399 */ 1400 pArgs->offsets=offsets+=length; 1401 } 1402 1403 if(sourceIndex>=0) { 1404 sourceIndex += static_cast<int32_t>(pArgs->source - s); 1405 } 1406 } 1407 1408 if(cnv->preToULength<0) { 1409 /* 1410 * switch the source to new replay units (cannot occur while replaying) 1411 * after offset handling and before end-of-input and callback handling 1412 */ 1413 if(realSource==nullptr) { 1414 realSource=pArgs->source; 1415 realSourceLimit=pArgs->sourceLimit; 1416 realFlush=pArgs->flush; 1417 realSourceIndex=sourceIndex; 1418 1419 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1420 pArgs->source=replay; 1421 pArgs->sourceLimit=replay-cnv->preToULength; 1422 pArgs->flush=false; 1423 if((sourceIndex+=cnv->preToULength)<0) { 1424 sourceIndex=-1; 1425 } 1426 1427 cnv->preToULength=0; 1428 } else { 1429 /* see implementation note before _fromUnicodeWithCallback() */ 1430 U_ASSERT(realSource==nullptr); 1431 *err=U_INTERNAL_PROGRAM_ERROR; 1432 } 1433 } 1434 1435 /* update pointers */ 1436 s=pArgs->source; 1437 t=pArgs->target; 1438 1439 if(U_SUCCESS(*err)) { 1440 if(s<pArgs->sourceLimit) { 1441 /* 1442 * continue with the conversion loop while there is still input left 1443 * (continue converting by breaking out of only the inner loop) 1444 */ 1445 break; 1446 } else if(realSource!=nullptr) { 1447 /* switch back from replaying to the real source and continue */ 1448 pArgs->source=realSource; 1449 pArgs->sourceLimit=realSourceLimit; 1450 pArgs->flush=realFlush; 1451 sourceIndex=realSourceIndex; 1452 1453 realSource=nullptr; 1454 break; 1455 } else if(pArgs->flush && cnv->toULength>0) { 1456 /* 1457 * the entire input stream is consumed 1458 * and there is a partial, truncated input sequence left 1459 */ 1460 1461 /* inject an error and continue with callback handling */ 1462 *err=U_TRUNCATED_CHAR_FOUND; 1463 calledCallback=false; /* new error condition */ 1464 } else { 1465 /* input consumed */ 1466 if(pArgs->flush) { 1467 /* 1468 * return to the conversion loop once more if the flush 1469 * flag is set and the conversion function has not 1470 * successfully processed the end of the input yet 1471 * 1472 * (continue converting by breaking out of only the inner loop) 1473 */ 1474 if(!converterSawEndOfInput) { 1475 break; 1476 } 1477 1478 /* reset the converter without calling the callback function */ 1479 _reset(cnv, UCNV_RESET_TO_UNICODE, false); 1480 } 1481 1482 /* done successfully */ 1483 return; 1484 } 1485 } 1486 1487 /* U_FAILURE(*err) */ 1488 { 1489 UErrorCode e; 1490 1491 if( calledCallback || 1492 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1493 (e!=U_INVALID_CHAR_FOUND && 1494 e!=U_ILLEGAL_CHAR_FOUND && 1495 e!=U_TRUNCATED_CHAR_FOUND && 1496 e!=U_ILLEGAL_ESCAPE_SEQUENCE && 1497 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) 1498 ) { 1499 /* 1500 * the callback did not or cannot resolve the error: 1501 * set output pointers and return 1502 * 1503 * the check for buffer overflow is redundant but it is 1504 * a high-runner case and hopefully documents the intent 1505 * well 1506 * 1507 * if we were replaying, then the replay buffer must be 1508 * copied back into the UConverter 1509 * and the real arguments must be restored 1510 */ 1511 if(realSource!=nullptr) { 1512 int32_t length; 1513 1514 U_ASSERT(cnv->preToULength==0); 1515 1516 length = static_cast<int32_t>(pArgs->sourceLimit - pArgs->source); 1517 if(length>0) { 1518 uprv_memcpy(cnv->preToU, pArgs->source, length); 1519 cnv->preToULength = static_cast<int8_t>(-length); 1520 } 1521 1522 pArgs->source=realSource; 1523 pArgs->sourceLimit=realSourceLimit; 1524 pArgs->flush=realFlush; 1525 } 1526 1527 return; 1528 } 1529 } 1530 1531 /* copy toUBytes[] to invalidCharBuffer[] */ 1532 errorInputLength=cnv->invalidCharLength=cnv->toULength; 1533 if(errorInputLength>0) { 1534 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); 1535 } 1536 1537 /* set the converter state to deal with the next character */ 1538 cnv->toULength=0; 1539 1540 /* call the callback function */ 1541 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { 1542 cnv->toUCallbackReason = UCNV_UNASSIGNED; 1543 } 1544 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, 1545 cnv->invalidCharBuffer, errorInputLength, 1546 cnv->toUCallbackReason, 1547 err); 1548 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ 1549 1550 /* 1551 * loop back to the offset handling 1552 * 1553 * this flag will indicate after offset handling 1554 * that a callback was called; 1555 * if the callback did not resolve the error, then we return 1556 */ 1557 calledCallback=true; 1558 } 1559 } 1560 } 1561 1562 /* 1563 * Output the toUnicode overflow buffer. 1564 * Call this function if(cnv->UCharErrorBufferLength>0). 1565 * @return true if overflow 1566 */ 1567 static UBool 1568 ucnv_outputOverflowToUnicode(UConverter *cnv, 1569 char16_t **target, const char16_t *targetLimit, 1570 int32_t **pOffsets, 1571 UErrorCode *err) { 1572 int32_t *offsets; 1573 char16_t *overflow, *t; 1574 int32_t i, length; 1575 1576 t=*target; 1577 if(pOffsets!=nullptr) { 1578 offsets=*pOffsets; 1579 } else { 1580 offsets=nullptr; 1581 } 1582 1583 overflow=cnv->UCharErrorBuffer; 1584 length=cnv->UCharErrorBufferLength; 1585 i=0; 1586 while(i<length) { 1587 if(t==targetLimit) { 1588 /* the overflow buffer contains too much, keep the rest */ 1589 int32_t j=0; 1590 1591 do { 1592 overflow[j++]=overflow[i++]; 1593 } while(i<length); 1594 1595 cnv->UCharErrorBufferLength = static_cast<int8_t>(j); 1596 *target=t; 1597 if(offsets!=nullptr) { 1598 *pOffsets=offsets; 1599 } 1600 *err=U_BUFFER_OVERFLOW_ERROR; 1601 return true; 1602 } 1603 1604 /* copy the overflow contents to the target */ 1605 *t++=overflow[i++]; 1606 if(offsets!=nullptr) { 1607 *offsets++=-1; /* no source index available for old output */ 1608 } 1609 } 1610 1611 /* the overflow buffer is completely copied to the target */ 1612 cnv->UCharErrorBufferLength=0; 1613 *target=t; 1614 if(offsets!=nullptr) { 1615 *pOffsets=offsets; 1616 } 1617 return false; 1618 } 1619 1620 U_CAPI void U_EXPORT2 1621 ucnv_toUnicode(UConverter *cnv, 1622 char16_t **target, const char16_t *targetLimit, 1623 const char **source, const char *sourceLimit, 1624 int32_t *offsets, 1625 UBool flush, 1626 UErrorCode *err) { 1627 UConverterToUnicodeArgs args; 1628 const char *s; 1629 char16_t *t; 1630 1631 /* check parameters */ 1632 if(err==nullptr || U_FAILURE(*err)) { 1633 return; 1634 } 1635 1636 if(cnv==nullptr || target==nullptr || source==nullptr) { 1637 *err=U_ILLEGAL_ARGUMENT_ERROR; 1638 return; 1639 } 1640 1641 s=*source; 1642 t=*target; 1643 1644 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { 1645 /* 1646 Prevent code from going into an infinite loop in case we do hit this 1647 limit. The limit pointer is expected to be on a char16_t * boundary. 1648 This also prevents the next argument check from failing. 1649 */ 1650 targetLimit = (const char16_t *)(((const char *)targetLimit) - 1); 1651 } 1652 1653 /* 1654 * All these conditions should never happen. 1655 * 1656 * 1) Make sure that the limits are >= to the address source or target 1657 * 1658 * 2) Make sure that the buffer sizes do not exceed the number range for 1659 * int32_t because some functions use the size (in units or bytes) 1660 * rather than comparing pointers, and because offsets are int32_t values. 1661 * 1662 * size_t is guaranteed to be unsigned and large enough for the job. 1663 * 1664 * Return with an error instead of adjusting the limits because we would 1665 * not be able to maintain the semantics that either the source must be 1666 * consumed or the target filled (unless an error occurs). 1667 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1668 * 1669 * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer 1670 * to a char * pointer and provide an incomplete char16_t code unit. 1671 */ 1672 if (sourceLimit<s || targetLimit<t || 1673 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || 1674 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || 1675 (((const char *)targetLimit-(const char *)t) & 1) != 0 1676 ) { 1677 *err=U_ILLEGAL_ARGUMENT_ERROR; 1678 return; 1679 } 1680 1681 /* output the target overflow buffer */ 1682 if( cnv->UCharErrorBufferLength>0 && 1683 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) 1684 ) { 1685 /* U_BUFFER_OVERFLOW_ERROR */ 1686 return; 1687 } 1688 /* *target may have moved, therefore stop using t */ 1689 1690 if(!flush && s==sourceLimit && cnv->preToULength>=0) { 1691 /* the overflow buffer is emptied and there is no new input: we are done */ 1692 return; 1693 } 1694 1695 /* 1696 * Do not simply return with a buffer overflow error if 1697 * !flush && t==targetLimit 1698 * because it is possible that the source will not generate any output. 1699 * For example, the skip callback may be called; 1700 * it does not output anything. 1701 */ 1702 1703 /* prepare the converter arguments */ 1704 args.converter=cnv; 1705 args.flush=flush; 1706 args.offsets=offsets; 1707 args.source=s; 1708 args.sourceLimit=sourceLimit; 1709 args.target=*target; 1710 args.targetLimit=targetLimit; 1711 args.size=sizeof(args); 1712 1713 _toUnicodeWithCallback(&args, err); 1714 1715 *source=args.source; 1716 *target=args.target; 1717 } 1718 1719 /* ucnv_to/fromUChars() ----------------------------------------------------- */ 1720 1721 U_CAPI int32_t U_EXPORT2 1722 ucnv_fromUChars(UConverter *cnv, 1723 char *dest, int32_t destCapacity, 1724 const char16_t *src, int32_t srcLength, 1725 UErrorCode *pErrorCode) { 1726 const char16_t *srcLimit; 1727 char *originalDest, *destLimit; 1728 int32_t destLength; 1729 1730 /* check arguments */ 1731 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 1732 return 0; 1733 } 1734 1735 if( cnv==nullptr || 1736 destCapacity<0 || (destCapacity>0 && dest==nullptr) || 1737 srcLength<-1 || (srcLength!=0 && src==nullptr) 1738 ) { 1739 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1740 return 0; 1741 } 1742 1743 /* initialize */ 1744 ucnv_resetFromUnicode(cnv); 1745 originalDest=dest; 1746 if(srcLength==-1) { 1747 srcLength=u_strlen(src); 1748 } 1749 if(srcLength>0) { 1750 srcLimit=src+srcLength; 1751 destCapacity=pinCapacity(dest, destCapacity); 1752 destLimit=dest+destCapacity; 1753 1754 /* perform the conversion */ 1755 UErrorCode bufferStatus = U_ZERO_ERROR; 1756 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus); 1757 destLength=(int32_t)(dest-originalDest); 1758 1759 /* if an overflow occurs, then get the preflighting length */ 1760 if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) { 1761 char buffer[1024]; 1762 1763 destLimit=buffer+sizeof(buffer); 1764 do { 1765 dest=buffer; 1766 bufferStatus=U_ZERO_ERROR; 1767 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus); 1768 destLength+=(int32_t)(dest-buffer); 1769 } while(bufferStatus==U_BUFFER_OVERFLOW_ERROR); 1770 } 1771 if (U_FAILURE(bufferStatus)) { 1772 *pErrorCode = bufferStatus; 1773 } 1774 } else { 1775 destLength=0; 1776 } 1777 1778 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); 1779 } 1780 1781 U_CAPI int32_t U_EXPORT2 1782 ucnv_toUChars(UConverter *cnv, 1783 char16_t *dest, int32_t destCapacity, 1784 const char *src, int32_t srcLength, 1785 UErrorCode *pErrorCode) { 1786 const char *srcLimit; 1787 char16_t *originalDest, *destLimit; 1788 int32_t destLength; 1789 1790 /* check arguments */ 1791 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 1792 return 0; 1793 } 1794 1795 if( cnv==nullptr || 1796 destCapacity<0 || (destCapacity>0 && dest==nullptr) || 1797 srcLength<-1 || (srcLength!=0 && src==nullptr)) 1798 { 1799 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1800 return 0; 1801 } 1802 1803 /* initialize */ 1804 ucnv_resetToUnicode(cnv); 1805 originalDest=dest; 1806 if(srcLength==-1) { 1807 srcLength=(int32_t)uprv_strlen(src); 1808 } 1809 if(srcLength>0) { 1810 srcLimit=src+srcLength; 1811 destCapacity=pinCapacity(dest, destCapacity); 1812 destLimit=dest+destCapacity; 1813 1814 /* perform the conversion */ 1815 UErrorCode bufferStatus = U_ZERO_ERROR; 1816 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus); 1817 destLength=(int32_t)(dest-originalDest); 1818 1819 /* if an overflow occurs, then get the preflighting length */ 1820 if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) 1821 { 1822 char16_t buffer[1024]; 1823 1824 destLimit=buffer+UPRV_LENGTHOF(buffer); 1825 do { 1826 dest=buffer; 1827 bufferStatus=U_ZERO_ERROR; 1828 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus); 1829 destLength+=(int32_t)(dest-buffer); 1830 } 1831 while(bufferStatus==U_BUFFER_OVERFLOW_ERROR); 1832 } 1833 if (U_FAILURE(bufferStatus)) { 1834 *pErrorCode = bufferStatus; 1835 } 1836 } else { 1837 destLength=0; 1838 } 1839 1840 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); 1841 } 1842 1843 /* ucnv_getNextUChar() ------------------------------------------------------ */ 1844 1845 U_CAPI UChar32 U_EXPORT2 1846 ucnv_getNextUChar(UConverter *cnv, 1847 const char **source, const char *sourceLimit, 1848 UErrorCode *err) { 1849 UConverterToUnicodeArgs args; 1850 char16_t buffer[U16_MAX_LENGTH]; 1851 const char *s; 1852 UChar32 c; 1853 int32_t i, length; 1854 1855 /* check parameters */ 1856 if(err==nullptr || U_FAILURE(*err)) { 1857 return 0xffff; 1858 } 1859 1860 if(cnv==nullptr || source==nullptr) { 1861 *err=U_ILLEGAL_ARGUMENT_ERROR; 1862 return 0xffff; 1863 } 1864 1865 s=*source; 1866 if(sourceLimit<s) { 1867 *err=U_ILLEGAL_ARGUMENT_ERROR; 1868 return 0xffff; 1869 } 1870 1871 /* 1872 * Make sure that the buffer sizes do not exceed the number range for 1873 * int32_t because some functions use the size (in units or bytes) 1874 * rather than comparing pointers, and because offsets are int32_t values. 1875 * 1876 * size_t is guaranteed to be unsigned and large enough for the job. 1877 * 1878 * Return with an error instead of adjusting the limits because we would 1879 * not be able to maintain the semantics that either the source must be 1880 * consumed or the target filled (unless an error occurs). 1881 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1882 */ 1883 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { 1884 *err=U_ILLEGAL_ARGUMENT_ERROR; 1885 return 0xffff; 1886 } 1887 1888 c=U_SENTINEL; 1889 1890 /* flush the target overflow buffer */ 1891 if(cnv->UCharErrorBufferLength>0) { 1892 char16_t *overflow; 1893 1894 overflow=cnv->UCharErrorBuffer; 1895 i=0; 1896 length=cnv->UCharErrorBufferLength; 1897 U16_NEXT(overflow, i, length, c); 1898 1899 /* move the remaining overflow contents up to the beginning */ 1900 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { 1901 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, 1902 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1903 } 1904 1905 if(!U16_IS_LEAD(c) || i<length) { 1906 return c; 1907 } 1908 /* 1909 * Continue if the overflow buffer contained only a lead surrogate, 1910 * in case the converter outputs single surrogates from complete 1911 * input sequences. 1912 */ 1913 } 1914 1915 /* 1916 * flush==true is implied for ucnv_getNextUChar() 1917 * 1918 * do not simply return even if s==sourceLimit because the converter may 1919 * not have seen flush==true before 1920 */ 1921 1922 /* prepare the converter arguments */ 1923 args.converter=cnv; 1924 args.flush=true; 1925 args.offsets=nullptr; 1926 args.source=s; 1927 args.sourceLimit=sourceLimit; 1928 args.target=buffer; 1929 args.targetLimit=buffer+1; 1930 args.size=sizeof(args); 1931 1932 if(c<0) { 1933 /* 1934 * call the native getNextUChar() implementation if we are 1935 * at a character boundary (toULength==0) 1936 * 1937 * unlike with _toUnicode(), getNextUChar() implementations must set 1938 * U_TRUNCATED_CHAR_FOUND for truncated input, 1939 * in addition to setting toULength/toUBytes[] 1940 */ 1941 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=nullptr) { 1942 c=cnv->sharedData->impl->getNextUChar(&args, err); 1943 *source=s=args.source; 1944 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { 1945 /* reset the converter without calling the callback function */ 1946 _reset(cnv, UCNV_RESET_TO_UNICODE, false); 1947 return 0xffff; /* no output */ 1948 } else if(U_SUCCESS(*err) && c>=0) { 1949 return c; 1950 /* 1951 * else fall through to use _toUnicode() because 1952 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all 1953 * U_FAILURE: call _toUnicode() for callback handling (do not output c) 1954 */ 1955 } 1956 } 1957 1958 /* convert to one char16_t in buffer[0], or handle getNextUChar() errors */ 1959 _toUnicodeWithCallback(&args, err); 1960 1961 if(*err==U_BUFFER_OVERFLOW_ERROR) { 1962 *err=U_ZERO_ERROR; 1963 } 1964 1965 i=0; 1966 length=(int32_t)(args.target-buffer); 1967 } else { 1968 /* write the lead surrogate from the overflow buffer */ 1969 buffer[0]=(char16_t)c; 1970 args.target=buffer+1; 1971 i=0; 1972 length=1; 1973 } 1974 1975 /* buffer contents starts at i and ends before length */ 1976 1977 if(U_FAILURE(*err)) { 1978 c=0xffff; /* no output */ 1979 } else if(length==0) { 1980 /* no input or only state changes */ 1981 *err=U_INDEX_OUTOFBOUNDS_ERROR; 1982 /* no need to reset explicitly because _toUnicodeWithCallback() did it */ 1983 c=0xffff; /* no output */ 1984 } else { 1985 c=buffer[0]; 1986 i=1; 1987 if(!U16_IS_LEAD(c)) { 1988 /* consume c=buffer[0], done */ 1989 } else { 1990 /* got a lead surrogate, see if a trail surrogate follows */ 1991 char16_t c2; 1992 1993 if(cnv->UCharErrorBufferLength>0) { 1994 /* got overflow output from the conversion */ 1995 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { 1996 /* got a trail surrogate, too */ 1997 c=U16_GET_SUPPLEMENTARY(c, c2); 1998 1999 /* move the remaining overflow contents up to the beginning */ 2000 if((--cnv->UCharErrorBufferLength)>0) { 2001 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, 2002 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 2003 } 2004 } else { 2005 /* c is an unpaired lead surrogate, just return it */ 2006 } 2007 } else if(args.source<sourceLimit) { 2008 /* convert once more, to buffer[1] */ 2009 args.targetLimit=buffer+2; 2010 _toUnicodeWithCallback(&args, err); 2011 if(*err==U_BUFFER_OVERFLOW_ERROR) { 2012 *err=U_ZERO_ERROR; 2013 } 2014 2015 length=(int32_t)(args.target-buffer); 2016 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { 2017 /* got a trail surrogate, too */ 2018 c=U16_GET_SUPPLEMENTARY(c, c2); 2019 i=2; 2020 } 2021 } 2022 } 2023 } 2024 2025 /* 2026 * move leftover output from buffer[i..length[ 2027 * into the beginning of the overflow buffer 2028 */ 2029 if(i<length) { 2030 /* move further overflow back */ 2031 int32_t delta=length-i; 2032 if((length=cnv->UCharErrorBufferLength)>0) { 2033 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, 2034 length*U_SIZEOF_UCHAR); 2035 } 2036 cnv->UCharErrorBufferLength=(int8_t)(length+delta); 2037 2038 cnv->UCharErrorBuffer[0]=buffer[i++]; 2039 if(delta>1) { 2040 cnv->UCharErrorBuffer[1]=buffer[i]; 2041 } 2042 } 2043 2044 *source=args.source; 2045 return c; 2046 } 2047 2048 /* ucnv_convert() and siblings ---------------------------------------------- */ 2049 2050 U_CAPI void U_EXPORT2 2051 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, 2052 char **target, const char *targetLimit, 2053 const char **source, const char *sourceLimit, 2054 char16_t *pivotStart, char16_t **pivotSource, 2055 char16_t **pivotTarget, const char16_t *pivotLimit, 2056 UBool reset, UBool flush, 2057 UErrorCode *pErrorCode) { 2058 char16_t pivotBuffer[CHUNK_SIZE]; 2059 const char16_t *myPivotSource; 2060 char16_t *myPivotTarget; 2061 const char *s; 2062 char *t; 2063 2064 UConverterToUnicodeArgs toUArgs; 2065 UConverterFromUnicodeArgs fromUArgs; 2066 UConverterConvert convert; 2067 2068 /* error checking */ 2069 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 2070 return; 2071 } 2072 2073 if( targetCnv==nullptr || sourceCnv==nullptr || 2074 source==nullptr || *source==nullptr || 2075 target==nullptr || *target==nullptr || targetLimit==nullptr 2076 ) { 2077 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2078 return; 2079 } 2080 2081 s=*source; 2082 t=*target; 2083 if((sourceLimit!=nullptr && sourceLimit<s) || targetLimit<t) { 2084 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2085 return; 2086 } 2087 2088 /* 2089 * Make sure that the buffer sizes do not exceed the number range for 2090 * int32_t. See ucnv_toUnicode() for a more detailed comment. 2091 */ 2092 if( 2093 (sourceLimit!=nullptr && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || 2094 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) 2095 ) { 2096 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2097 return; 2098 } 2099 2100 if(pivotStart==nullptr) { 2101 if(!flush) { 2102 /* streaming conversion requires an explicit pivot buffer */ 2103 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2104 return; 2105 } 2106 2107 /* use the stack pivot buffer */ 2108 myPivotSource=myPivotTarget=pivotStart=pivotBuffer; 2109 pivotSource=(char16_t **)&myPivotSource; 2110 pivotTarget=&myPivotTarget; 2111 pivotLimit=pivotBuffer+CHUNK_SIZE; 2112 } else if( pivotStart>=pivotLimit || 2113 pivotSource==nullptr || *pivotSource==nullptr || 2114 pivotTarget==nullptr || *pivotTarget==nullptr || 2115 pivotLimit==nullptr 2116 ) { 2117 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2118 return; 2119 } 2120 2121 if(sourceLimit==nullptr) { 2122 /* get limit of single-byte-NUL-terminated source string */ 2123 sourceLimit=uprv_strchr(*source, 0); 2124 } 2125 2126 if(reset) { 2127 ucnv_resetToUnicode(sourceCnv); 2128 ucnv_resetFromUnicode(targetCnv); 2129 *pivotSource=*pivotTarget=pivotStart; 2130 } else if(targetCnv->charErrorBufferLength>0) { 2131 /* output the targetCnv overflow buffer */ 2132 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, nullptr, pErrorCode)) { 2133 /* U_BUFFER_OVERFLOW_ERROR */ 2134 return; 2135 } 2136 /* *target has moved, therefore stop using t */ 2137 2138 if( !flush && 2139 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && 2140 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit 2141 ) { 2142 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ 2143 return; 2144 } 2145 } 2146 2147 /* Is direct-UTF-8 conversion available? */ 2148 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2149 targetCnv->sharedData->impl->fromUTF8!=nullptr 2150 ) { 2151 convert=targetCnv->sharedData->impl->fromUTF8; 2152 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2153 sourceCnv->sharedData->impl->toUTF8!=nullptr 2154 ) { 2155 convert=sourceCnv->sharedData->impl->toUTF8; 2156 } else { 2157 convert=nullptr; 2158 } 2159 2160 /* 2161 * If direct-UTF-8 conversion is available, then we use a smaller 2162 * pivot buffer for error handling and partial matches 2163 * so that we quickly return to direct conversion. 2164 * 2165 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. 2166 * 2167 * We could reduce the pivot buffer size further, at the cost of 2168 * buffer overflows from callbacks. 2169 * The pivot buffer should not be smaller than the maximum number of 2170 * fromUnicode extension table input UChars 2171 * (for m:n conversion, see 2172 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) 2173 * or 2 for surrogate pairs. 2174 * 2175 * Too small a buffer can cause thrashing between pivoting and direct 2176 * conversion, with function call overhead outweighing the benefits 2177 * of direct conversion. 2178 */ 2179 if(convert!=nullptr && (pivotLimit-pivotStart)>32) { 2180 pivotLimit=pivotStart+32; 2181 } 2182 2183 /* prepare the converter arguments */ 2184 fromUArgs.converter=targetCnv; 2185 fromUArgs.flush=false; 2186 fromUArgs.offsets=nullptr; 2187 fromUArgs.target=*target; 2188 fromUArgs.targetLimit=targetLimit; 2189 fromUArgs.size=sizeof(fromUArgs); 2190 2191 toUArgs.converter=sourceCnv; 2192 toUArgs.flush=flush; 2193 toUArgs.offsets=nullptr; 2194 toUArgs.source=s; 2195 toUArgs.sourceLimit=sourceLimit; 2196 toUArgs.targetLimit=pivotLimit; 2197 toUArgs.size=sizeof(toUArgs); 2198 2199 /* 2200 * TODO: Consider separating this function into two functions, 2201 * extracting exactly the conversion loop, 2202 * for readability and to reduce the set of visible variables. 2203 * 2204 * Otherwise stop using s and t from here on. 2205 */ 2206 s=t=nullptr; 2207 2208 /* 2209 * conversion loop 2210 * 2211 * The sequence of steps in the loop may appear backward, 2212 * but the principle is simple: 2213 * In the chain of 2214 * source - sourceCnv overflow - pivot - targetCnv overflow - target 2215 * empty out later buffers before refilling them from earlier ones. 2216 * 2217 * The targetCnv overflow buffer is flushed out only once before the loop. 2218 */ 2219 for(;;) { 2220 /* 2221 * if(pivot not empty or error or replay or flush fromUnicode) { 2222 * fromUnicode(pivot -> target); 2223 * } 2224 * 2225 * For pivoting conversion; and for direct conversion for 2226 * error callback handling and flushing the replay buffer. 2227 */ 2228 if( *pivotSource<*pivotTarget || 2229 U_FAILURE(*pErrorCode) || 2230 targetCnv->preFromULength<0 || 2231 fromUArgs.flush 2232 ) { 2233 fromUArgs.source=*pivotSource; 2234 fromUArgs.sourceLimit=*pivotTarget; 2235 _fromUnicodeWithCallback(&fromUArgs, pErrorCode); 2236 if(U_FAILURE(*pErrorCode)) { 2237 /* target overflow, or conversion error */ 2238 *pivotSource=(char16_t *)fromUArgs.source; 2239 break; 2240 } 2241 2242 /* 2243 * _fromUnicodeWithCallback() must have consumed the pivot contents 2244 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() 2245 */ 2246 } 2247 2248 /* The pivot buffer is empty; reset it so we start at pivotStart. */ 2249 *pivotSource=*pivotTarget=pivotStart; 2250 2251 /* 2252 * if(sourceCnv overflow buffer not empty) { 2253 * move(sourceCnv overflow buffer -> pivot); 2254 * continue; 2255 * } 2256 */ 2257 /* output the sourceCnv overflow buffer */ 2258 if(sourceCnv->UCharErrorBufferLength>0) { 2259 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, nullptr, pErrorCode)) { 2260 /* U_BUFFER_OVERFLOW_ERROR */ 2261 *pErrorCode=U_ZERO_ERROR; 2262 } 2263 continue; 2264 } 2265 2266 /* 2267 * check for end of input and break if done 2268 * 2269 * Checking both flush and fromUArgs.flush ensures that the converters 2270 * have been called with the flush flag set if the ucnv_convertEx() 2271 * caller set it. 2272 */ 2273 if( toUArgs.source==sourceLimit && 2274 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && 2275 (!flush || fromUArgs.flush) 2276 ) { 2277 /* done successfully */ 2278 break; 2279 } 2280 2281 /* 2282 * use direct conversion if available 2283 * but not if continuing a partial match 2284 * or flushing the toUnicode replay buffer 2285 */ 2286 if(convert!=nullptr && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { 2287 if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2288 /* remove a warning that may be set by this function */ 2289 *pErrorCode=U_ZERO_ERROR; 2290 } 2291 convert(&fromUArgs, &toUArgs, pErrorCode); 2292 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2293 break; 2294 } else if(U_FAILURE(*pErrorCode)) { 2295 if(sourceCnv->toULength>0) { 2296 /* 2297 * Fall through to calling _toUnicodeWithCallback() 2298 * for callback handling. 2299 * 2300 * The pivot buffer will be reset with 2301 * *pivotSource=*pivotTarget=pivotStart; 2302 * which indicates a toUnicode error to the caller 2303 * (*pivotSource==pivotStart shows no pivot UChars consumed). 2304 */ 2305 } else { 2306 /* 2307 * Indicate a fromUnicode error to the caller 2308 * (*pivotSource>pivotStart shows some pivot UChars consumed). 2309 */ 2310 *pivotSource=*pivotTarget=pivotStart+1; 2311 /* 2312 * Loop around to calling _fromUnicodeWithCallbacks() 2313 * for callback handling. 2314 */ 2315 continue; 2316 } 2317 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2318 /* 2319 * No error, but the implementation requested to temporarily 2320 * fall back to pivoting. 2321 */ 2322 *pErrorCode=U_ZERO_ERROR; 2323 /* 2324 * The following else branches are almost identical to the end-of-input 2325 * handling in _toUnicodeWithCallback(). 2326 * Avoid calling it just for the end of input. 2327 */ 2328 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ 2329 /* 2330 * the entire input stream is consumed 2331 * and there is a partial, truncated input sequence left 2332 */ 2333 2334 /* inject an error and continue with callback handling */ 2335 *pErrorCode=U_TRUNCATED_CHAR_FOUND; 2336 } else { 2337 /* input consumed */ 2338 if(flush) { 2339 /* reset the converters without calling the callback functions */ 2340 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, false); 2341 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, false); 2342 } 2343 2344 /* done successfully */ 2345 break; 2346 } 2347 } 2348 2349 /* 2350 * toUnicode(source -> pivot); 2351 * 2352 * For pivoting conversion; and for direct conversion for 2353 * error callback handling, continuing partial matches 2354 * and flushing the replay buffer. 2355 * 2356 * The pivot buffer is empty and reset. 2357 */ 2358 toUArgs.target=pivotStart; /* ==*pivotTarget */ 2359 /* toUArgs.targetLimit=pivotLimit; already set before the loop */ 2360 _toUnicodeWithCallback(&toUArgs, pErrorCode); 2361 *pivotTarget=toUArgs.target; 2362 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2363 /* pivot overflow: continue with the conversion loop */ 2364 *pErrorCode=U_ZERO_ERROR; 2365 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { 2366 /* conversion error, or there was nothing left to convert */ 2367 break; 2368 } 2369 /* 2370 * else: 2371 * _toUnicodeWithCallback() wrote into the pivot buffer, 2372 * continue with fromUnicode conversion. 2373 * 2374 * Set the fromUnicode flush flag if we flush and if toUnicode has 2375 * processed the end of the input. 2376 */ 2377 if( flush && toUArgs.source==sourceLimit && 2378 sourceCnv->preToULength>=0 && 2379 sourceCnv->UCharErrorBufferLength==0 2380 ) { 2381 fromUArgs.flush=true; 2382 } 2383 } 2384 2385 /* 2386 * The conversion loop is exited when one of the following is true: 2387 * - the entire source text has been converted successfully to the target buffer 2388 * - a target buffer overflow occurred 2389 * - a conversion error occurred 2390 */ 2391 2392 *source=toUArgs.source; 2393 *target=fromUArgs.target; 2394 2395 /* terminate the target buffer if possible */ 2396 if(flush && U_SUCCESS(*pErrorCode)) { 2397 if(*target!=targetLimit) { 2398 **target=0; 2399 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { 2400 *pErrorCode=U_ZERO_ERROR; 2401 } 2402 } else { 2403 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 2404 } 2405 } 2406 } 2407 2408 /* internal implementation of ucnv_convert() etc. with preflighting */ 2409 static int32_t 2410 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, 2411 char *target, int32_t targetCapacity, 2412 const char *source, int32_t sourceLength, 2413 UErrorCode *pErrorCode) { 2414 char16_t pivotBuffer[CHUNK_SIZE]; 2415 char16_t *pivot, *pivot2; 2416 2417 char *myTarget; 2418 const char *sourceLimit; 2419 const char *targetLimit; 2420 int32_t targetLength=0; 2421 2422 /* set up */ 2423 if(sourceLength<0) { 2424 sourceLimit=uprv_strchr(source, 0); 2425 } else { 2426 sourceLimit=source+sourceLength; 2427 } 2428 2429 /* if there is no input data, we're done */ 2430 if(source==sourceLimit) { 2431 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2432 } 2433 2434 pivot=pivot2=pivotBuffer; 2435 myTarget=target; 2436 targetLength=0; 2437 2438 if(targetCapacity>0) { 2439 /* perform real conversion */ 2440 targetLimit=target+targetCapacity; 2441 ucnv_convertEx(outConverter, inConverter, 2442 &myTarget, targetLimit, 2443 &source, sourceLimit, 2444 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2445 false, 2446 true, 2447 pErrorCode); 2448 targetLength = static_cast<int32_t>(myTarget - target); 2449 } 2450 2451 /* 2452 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing 2453 * to it but continue the conversion in order to store in targetCapacity 2454 * the number of bytes that was required. 2455 */ 2456 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) 2457 { 2458 char targetBuffer[CHUNK_SIZE]; 2459 2460 targetLimit=targetBuffer+CHUNK_SIZE; 2461 do { 2462 *pErrorCode=U_ZERO_ERROR; 2463 myTarget=targetBuffer; 2464 ucnv_convertEx(outConverter, inConverter, 2465 &myTarget, targetLimit, 2466 &source, sourceLimit, 2467 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2468 false, 2469 true, 2470 pErrorCode); 2471 targetLength += static_cast<int32_t>(myTarget - targetBuffer); 2472 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 2473 2474 /* done with preflighting, set warnings and errors as appropriate */ 2475 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); 2476 } 2477 2478 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ 2479 return targetLength; 2480 } 2481 2482 U_CAPI int32_t U_EXPORT2 2483 ucnv_convert(const char *toConverterName, const char *fromConverterName, 2484 char *target, int32_t targetCapacity, 2485 const char *source, int32_t sourceLength, 2486 UErrorCode *pErrorCode) { 2487 UConverter in, out; /* stack-allocated */ 2488 UConverter *inConverter, *outConverter; 2489 int32_t targetLength; 2490 2491 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 2492 return 0; 2493 } 2494 2495 if( source==nullptr || sourceLength<-1 || 2496 targetCapacity<0 || (targetCapacity>0 && target==nullptr) 2497 ) { 2498 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2499 return 0; 2500 } 2501 2502 /* if there is no input data, we're done */ 2503 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2504 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2505 } 2506 2507 /* create the converters */ 2508 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); 2509 if(U_FAILURE(*pErrorCode)) { 2510 return 0; 2511 } 2512 2513 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); 2514 if(U_FAILURE(*pErrorCode)) { 2515 ucnv_close(inConverter); 2516 return 0; 2517 } 2518 2519 targetLength=ucnv_internalConvert(outConverter, inConverter, 2520 target, targetCapacity, 2521 source, sourceLength, 2522 pErrorCode); 2523 2524 ucnv_close(inConverter); 2525 ucnv_close(outConverter); 2526 2527 return targetLength; 2528 } 2529 2530 /* @internal */ 2531 static int32_t 2532 ucnv_convertAlgorithmic(UBool convertToAlgorithmic, 2533 UConverterType algorithmicType, 2534 UConverter *cnv, 2535 char *target, int32_t targetCapacity, 2536 const char *source, int32_t sourceLength, 2537 UErrorCode *pErrorCode) { 2538 UConverter algoConverterStatic; /* stack-allocated */ 2539 UConverter *algoConverter, *to, *from; 2540 int32_t targetLength; 2541 2542 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 2543 return 0; 2544 } 2545 2546 if( cnv==nullptr || source==nullptr || sourceLength<-1 || 2547 targetCapacity<0 || (targetCapacity>0 && target==nullptr) 2548 ) { 2549 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2550 return 0; 2551 } 2552 2553 /* if there is no input data, we're done */ 2554 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2555 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2556 } 2557 2558 /* create the algorithmic converter */ 2559 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, 2560 "", 0, pErrorCode); 2561 if(U_FAILURE(*pErrorCode)) { 2562 return 0; 2563 } 2564 2565 /* reset the other converter */ 2566 if(convertToAlgorithmic) { 2567 /* cnv->Unicode->algo */ 2568 ucnv_resetToUnicode(cnv); 2569 to=algoConverter; 2570 from=cnv; 2571 } else { 2572 /* algo->Unicode->cnv */ 2573 ucnv_resetFromUnicode(cnv); 2574 from=algoConverter; 2575 to=cnv; 2576 } 2577 2578 targetLength=ucnv_internalConvert(to, from, 2579 target, targetCapacity, 2580 source, sourceLength, 2581 pErrorCode); 2582 2583 ucnv_close(algoConverter); 2584 2585 return targetLength; 2586 } 2587 2588 U_CAPI int32_t U_EXPORT2 2589 ucnv_toAlgorithmic(UConverterType algorithmicType, 2590 UConverter *cnv, 2591 char *target, int32_t targetCapacity, 2592 const char *source, int32_t sourceLength, 2593 UErrorCode *pErrorCode) { 2594 return ucnv_convertAlgorithmic(true, algorithmicType, cnv, 2595 target, targetCapacity, 2596 source, sourceLength, 2597 pErrorCode); 2598 } 2599 2600 U_CAPI int32_t U_EXPORT2 2601 ucnv_fromAlgorithmic(UConverter *cnv, 2602 UConverterType algorithmicType, 2603 char *target, int32_t targetCapacity, 2604 const char *source, int32_t sourceLength, 2605 UErrorCode *pErrorCode) UPRV_NO_SANITIZE_UNDEFINED { 2606 2607 if(algorithmicType<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=algorithmicType) { 2608 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2609 return 0; 2610 } 2611 return ucnv_convertAlgorithmic(false, algorithmicType, cnv, 2612 target, targetCapacity, 2613 source, sourceLength, 2614 pErrorCode); 2615 } 2616 2617 U_CAPI UConverterType U_EXPORT2 2618 ucnv_getType(const UConverter* converter) 2619 { 2620 int8_t type = converter->sharedData->staticData->conversionType; 2621 #if !UCONFIG_NO_LEGACY_CONVERSION 2622 if(type == UCNV_MBCS) { 2623 return ucnv_MBCSGetType(converter); 2624 } 2625 #endif 2626 return (UConverterType)type; 2627 } 2628 2629 U_CAPI void U_EXPORT2 2630 ucnv_getStarters(const UConverter* converter, 2631 UBool starters[256], 2632 UErrorCode* err) 2633 { 2634 if (err == nullptr || U_FAILURE(*err)) { 2635 return; 2636 } 2637 2638 if(converter->sharedData->impl->getStarters != nullptr) { 2639 converter->sharedData->impl->getStarters(converter, starters, err); 2640 } else { 2641 *err = U_ILLEGAL_ARGUMENT_ERROR; 2642 } 2643 } 2644 2645 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) 2646 { 2647 UErrorCode errorCode; 2648 const char *name; 2649 int32_t i; 2650 2651 if(cnv==nullptr) { 2652 return nullptr; 2653 } 2654 2655 errorCode=U_ZERO_ERROR; 2656 name=ucnv_getName(cnv, &errorCode); 2657 if(U_FAILURE(errorCode)) { 2658 return nullptr; 2659 } 2660 2661 for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i) 2662 { 2663 if(0==uprv_strcmp(name, ambiguousConverters[i].name)) 2664 { 2665 return ambiguousConverters+i; 2666 } 2667 } 2668 2669 return nullptr; 2670 } 2671 2672 U_CAPI void U_EXPORT2 2673 ucnv_fixFileSeparator(const UConverter *cnv, 2674 char16_t* source, 2675 int32_t sourceLength) { 2676 const UAmbiguousConverter *a; 2677 int32_t i; 2678 char16_t variant5c; 2679 2680 if(cnv==nullptr || source==nullptr || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==nullptr) 2681 { 2682 return; 2683 } 2684 2685 variant5c=a->variant5c; 2686 for(i=0; i<sourceLength; ++i) { 2687 if(source[i]==variant5c) { 2688 source[i]=0x5c; 2689 } 2690 } 2691 } 2692 2693 U_CAPI UBool U_EXPORT2 2694 ucnv_isAmbiguous(const UConverter *cnv) { 2695 return ucnv_getAmbiguous(cnv)!=nullptr; 2696 } 2697 2698 U_CAPI void U_EXPORT2 2699 ucnv_setFallback(UConverter *cnv, UBool usesFallback) 2700 { 2701 cnv->useFallback = usesFallback; 2702 } 2703 2704 U_CAPI UBool U_EXPORT2 2705 ucnv_usesFallback(const UConverter *cnv) 2706 { 2707 return cnv->useFallback; 2708 } 2709 2710 U_CAPI void U_EXPORT2 2711 ucnv_getInvalidChars (const UConverter * converter, 2712 char *errBytes, 2713 int8_t * len, 2714 UErrorCode * err) 2715 { 2716 if (err == nullptr || U_FAILURE(*err)) 2717 { 2718 return; 2719 } 2720 if (len == nullptr || errBytes == nullptr || converter == nullptr) 2721 { 2722 *err = U_ILLEGAL_ARGUMENT_ERROR; 2723 return; 2724 } 2725 if (*len < converter->invalidCharLength) 2726 { 2727 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2728 return; 2729 } 2730 if ((*len = converter->invalidCharLength) > 0) 2731 { 2732 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); 2733 } 2734 } 2735 2736 U_CAPI void U_EXPORT2 2737 ucnv_getInvalidUChars (const UConverter * converter, 2738 char16_t *errChars, 2739 int8_t * len, 2740 UErrorCode * err) 2741 { 2742 if (err == nullptr || U_FAILURE(*err)) 2743 { 2744 return; 2745 } 2746 if (len == nullptr || errChars == nullptr || converter == nullptr) 2747 { 2748 *err = U_ILLEGAL_ARGUMENT_ERROR; 2749 return; 2750 } 2751 if (*len < converter->invalidUCharLength) 2752 { 2753 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2754 return; 2755 } 2756 if ((*len = converter->invalidUCharLength) > 0) 2757 { 2758 u_memcpy (errChars, converter->invalidUCharBuffer, *len); 2759 } 2760 } 2761 2762 #define SIG_MAX_LEN 5 2763 2764 U_CAPI const char* U_EXPORT2 2765 ucnv_detectUnicodeSignature( const char* source, 2766 int32_t sourceLength, 2767 int32_t* signatureLength, 2768 UErrorCode* pErrorCode) { 2769 int32_t dummy; 2770 2771 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN 2772 * bytes we don't misdetect something 2773 */ 2774 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; 2775 int i = 0; 2776 2777 if((pErrorCode==nullptr) || U_FAILURE(*pErrorCode)){ 2778 return nullptr; 2779 } 2780 2781 if(source == nullptr || sourceLength < -1){ 2782 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2783 return nullptr; 2784 } 2785 2786 if(signatureLength == nullptr) { 2787 signatureLength = &dummy; 2788 } 2789 2790 if(sourceLength==-1){ 2791 sourceLength=(int32_t)uprv_strlen(source); 2792 } 2793 2794 2795 while(i<sourceLength&& i<SIG_MAX_LEN){ 2796 start[i]=source[i]; 2797 i++; 2798 } 2799 2800 if(start[0] == '\xFE' && start[1] == '\xFF') { 2801 *signatureLength=2; 2802 return "UTF-16BE"; 2803 } else if(start[0] == '\xFF' && start[1] == '\xFE') { 2804 if(start[2] == '\x00' && start[3] =='\x00') { 2805 *signatureLength=4; 2806 return "UTF-32LE"; 2807 } else { 2808 *signatureLength=2; 2809 return "UTF-16LE"; 2810 } 2811 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { 2812 *signatureLength=3; 2813 return "UTF-8"; 2814 } else if(start[0] == '\x00' && start[1] == '\x00' && 2815 start[2] == '\xFE' && start[3]=='\xFF') { 2816 *signatureLength=4; 2817 return "UTF-32BE"; 2818 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { 2819 *signatureLength=3; 2820 return "SCSU"; 2821 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { 2822 *signatureLength=3; 2823 return "BOCU-1"; 2824 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { 2825 /* 2826 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ 2827 * depending on the second UTF-16 code unit. 2828 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF 2829 * if it occurs. 2830 * 2831 * So far we have +/v 2832 */ 2833 if(start[3] == '\x38' && start[4] == '\x2D') { 2834 /* 5 bytes +/v8- */ 2835 *signatureLength=5; 2836 return "UTF-7"; 2837 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { 2838 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ 2839 *signatureLength=4; 2840 return "UTF-7"; 2841 } 2842 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ 2843 *signatureLength=4; 2844 return "UTF-EBCDIC"; 2845 } 2846 2847 2848 /* no known Unicode signature byte sequence recognized */ 2849 *signatureLength=0; 2850 return nullptr; 2851 } 2852 2853 U_CAPI int32_t U_EXPORT2 2854 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) 2855 { 2856 if(status == nullptr || U_FAILURE(*status)){ 2857 return -1; 2858 } 2859 if(cnv == nullptr){ 2860 *status = U_ILLEGAL_ARGUMENT_ERROR; 2861 return -1; 2862 } 2863 2864 if(cnv->preFromUFirstCP >= 0){ 2865 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; 2866 }else if(cnv->preFromULength < 0){ 2867 return -cnv->preFromULength ; 2868 }else if(cnv->fromUChar32 > 0){ 2869 return 1; 2870 } 2871 return 0; 2872 2873 } 2874 2875 U_CAPI int32_t U_EXPORT2 2876 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ 2877 2878 if(status == nullptr || U_FAILURE(*status)){ 2879 return -1; 2880 } 2881 if(cnv == nullptr){ 2882 *status = U_ILLEGAL_ARGUMENT_ERROR; 2883 return -1; 2884 } 2885 2886 if(cnv->preToULength > 0){ 2887 return cnv->preToULength ; 2888 }else if(cnv->preToULength < 0){ 2889 return -cnv->preToULength; 2890 }else if(cnv->toULength > 0){ 2891 return cnv->toULength; 2892 } 2893 return 0; 2894 } 2895 2896 U_CAPI UBool U_EXPORT2 2897 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ 2898 if (U_FAILURE(*status)) { 2899 return false; 2900 } 2901 2902 if (cnv == nullptr) { 2903 *status = U_ILLEGAL_ARGUMENT_ERROR; 2904 return false; 2905 } 2906 2907 switch (ucnv_getType(cnv)) { 2908 case UCNV_SBCS: 2909 case UCNV_DBCS: 2910 case UCNV_UTF32_BigEndian: 2911 case UCNV_UTF32_LittleEndian: 2912 case UCNV_UTF32: 2913 case UCNV_US_ASCII: 2914 return true; 2915 default: 2916 return false; 2917 } 2918 } 2919 #endif 2920 2921 /* 2922 * Hey, Emacs, please set the following: 2923 * 2924 * Local Variables: 2925 * indent-tabs-mode: nil 2926 * End: 2927 * 2928 */