ucnv_io.cpp (50074B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1999-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * 12 * ucnv_io.cpp: 13 * initializes global variables and defines functions pertaining to converter 14 * name resolution aspect of the conversion code. 15 * 16 * new implementation: 17 * 18 * created on: 1999nov22 19 * created by: Markus W. Scherer 20 * 21 * Use the binary cnvalias.icu (created from convrtrs.txt) to work 22 * with aliases for converter names. 23 * 24 * Date Name Description 25 * 11/22/1999 markus Created 26 * 06/28/2002 grhoten Major overhaul of the converter alias design. 27 * Now an alias can map to different converters 28 * depending on the specified standard. 29 ******************************************************************************* 30 */ 31 32 #include "unicode/utypes.h" 33 34 #if !UCONFIG_NO_CONVERSION 35 36 #include "unicode/ucnv.h" 37 #include "unicode/udata.h" 38 39 #include "umutex.h" 40 #include "uarrsort.h" 41 #include "uassert.h" 42 #include "udataswp.h" 43 #include "udatamem.h" 44 #include "cstring.h" 45 #include "cmemory.h" 46 #include "ucnv_io.h" 47 #include "uenumimp.h" 48 #include "ucln_cmn.h" 49 50 /* Format of cnvalias.icu ----------------------------------------------------- 51 * 52 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. 53 * This binary form contains several tables. All indexes are to uint16_t 54 * units, and not to the bytes (uint8_t units). Addressing everything on 55 * 16-bit boundaries allows us to store more information with small index 56 * numbers, which are also 16-bit in size. The majority of the table (except 57 * the string table) are 16-bit numbers. 58 * 59 * First there is the size of the Table of Contents (TOC). The TOC 60 * entries contain the size of each section. In order to find the offset 61 * you just need to sum up the previous offsets. 62 * The TOC length and entries are an array of uint32_t values. 63 * The first section after the TOC starts immediately after the TOC. 64 * 65 * 1) This section contains a list of converters. This list contains indexes 66 * into the string table for the converter name. The index of this list is 67 * also used by other sections, which are mentioned later on. 68 * This list is not sorted. 69 * 70 * 2) This section contains a list of tags. This list contains indexes 71 * into the string table for the tag name. The index of this list is 72 * also used by other sections, which are mentioned later on. 73 * This list is in priority order of standards. 74 * 75 * 3) This section contains a list of sorted unique aliases. This 76 * list contains indexes into the string table for the alias name. The 77 * index of this list is also used by other sections, like the 4th section. 78 * The index for the 3rd and 4th section is used to get the 79 * alias -> converter name mapping. Section 3 and 4 form a two column table. 80 * Some of the most significant bits of each index may contain other 81 * information (see findConverter for details). 82 * 83 * 4) This section contains a list of mapped converter names. Consider this 84 * as a table that maps the 3rd section to the 1st section. This list contains 85 * indexes into the 1st section. The index of this list is the same index in 86 * the 3rd section. There is also some extra information in the high bits of 87 * each converter index in this table. Currently it's only used to say that 88 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK 89 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is 90 * the predigested form of the 5th section so that an alias lookup can be fast. 91 * 92 * 5) This section contains a 2D array with indexes to the 6th section. This 93 * section is the full form of all alias mappings. The column index is the 94 * index into the converter list (column header). The row index is the index 95 * to tag list (row header). This 2D array is the top part a 3D array. The 96 * third dimension is in the 6th section. 97 * 98 * 6) This is blob of variable length arrays. Each array starts with a size, 99 * and is followed by indexes to alias names in the string table. This is 100 * the third dimension to the section 5. No other section should be referencing 101 * this section. 102 * 103 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its 104 * presence indicates that a section 9 exists. UConverterAliasOptions specifies 105 * what type of string normalization is used among other potential things in the 106 * future. 107 * 108 * 8) This is the string table. All strings are indexed on an even address. 109 * There are two reasons for this. First many chip architectures locate strings 110 * faster on even address boundaries. Second, since all indexes are 16-bit 111 * numbers, this string table can be 128KB in size instead of 64KB when we 112 * only have strings starting on an even address. 113 * 114 * 9) When present this is a set of prenormalized strings from section 8. This 115 * table contains normalized strings with the dashes and spaces stripped out, 116 * and all strings lowercased. In the future, the options in section 7 may state 117 * other types of normalization. 118 * 119 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag 120 * has a unique alias among all converters. That same alias can 121 * be mentioned in other standards on different converters, 122 * but only one alias per tag can be unique. 123 * 124 * 125 * Converter Names (Usually in TR22 form) 126 * -------------------------------------------. 127 * T / /| 128 * a / / | 129 * g / / | 130 * s / / | 131 * / / | 132 * ------------------------------------------/ | 133 * A | | | 134 * l | | | 135 * i | | / 136 * a | | / 137 * s | | / 138 * e | | / 139 * s | |/ 140 * ------------------------------------------- 141 * 142 * 143 * 144 * Here is what it really looks like. It's like swiss cheese. 145 * There are holes. Some converters aren't recognized by 146 * a standard, or they are really old converters that the 147 * standard doesn't recognize anymore. 148 * 149 * Converter Names (Usually in TR22 form) 150 * -------------------------------------------. 151 * T /##########################################/| 152 * a / # # /# 153 * g / # ## ## ### # ### ### ### #/ 154 * s / # ##### #### ## ## #/# 155 * / ### # # ## # # # ### # # #/## 156 * ------------------------------------------/# # 157 * A |### # # ## # # # ### # # #|# # 158 * l |# # # # # ## # #|# # 159 * i |# # # # # # #|# 160 * a |# #|# 161 * s | #|# 162 * e 163 * s 164 * 165 */ 166 167 /** 168 * Used by the UEnumeration API 169 */ 170 typedef struct UAliasContext { 171 uint32_t listOffset; 172 uint32_t listIdx; 173 } UAliasContext; 174 175 static const char DATA_NAME[] = "cnvalias"; 176 static const char DATA_TYPE[] = "icu"; 177 178 static UDataMemory *gAliasData=nullptr; 179 static icu::UInitOnce gAliasDataInitOnce {}; 180 181 enum { 182 tocLengthIndex=0, 183 converterListIndex=1, 184 tagListIndex=2, 185 aliasListIndex=3, 186 untaggedConvArrayIndex=4, 187 taggedAliasArrayIndex=5, 188 taggedAliasListsIndex=6, 189 tableOptionsIndex=7, 190 stringTableIndex=8, 191 normalizedStringTableIndex=9, 192 offsetsCount, /* length of the swapper's temporary offsets[] */ 193 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ 194 }; 195 196 static const UConverterAliasOptions defaultTableOptions = { 197 UCNV_IO_UNNORMALIZED, 198 0 /* containsCnvOptionInfo */ 199 }; 200 static UConverterAlias gMainTable; 201 202 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) 203 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) 204 205 static UBool U_CALLCONV 206 isAcceptable(void * /*context*/, 207 const char * /*type*/, const char * /*name*/, 208 const UDataInfo *pInfo) { 209 return 210 pInfo->size>=20 && 211 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 212 pInfo->charsetFamily==U_CHARSET_FAMILY && 213 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ 214 pInfo->dataFormat[1]==0x76 && 215 pInfo->dataFormat[2]==0x41 && 216 pInfo->dataFormat[3]==0x6c && 217 pInfo->formatVersion[0]==3; 218 } 219 220 static UBool U_CALLCONV ucnv_io_cleanup() 221 { 222 if (gAliasData) { 223 udata_close(gAliasData); 224 gAliasData = nullptr; 225 } 226 gAliasDataInitOnce.reset(); 227 228 uprv_memset(&gMainTable, 0, sizeof(gMainTable)); 229 230 return true; /* Everything was cleaned up */ 231 } 232 233 static void U_CALLCONV initAliasData(UErrorCode &errCode) { 234 UDataMemory *data; 235 const uint16_t *table; 236 const uint32_t *sectionSizes; 237 uint32_t tableStart; 238 uint32_t currOffset; 239 int32_t sizeOfData; 240 int32_t sizeOfTOC; 241 242 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); 243 244 U_ASSERT(gAliasData == nullptr); 245 data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode); 246 if (U_FAILURE(errCode)) { 247 return; 248 } 249 250 sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data)); 251 int32_t dataLength = udata_getLength(data); // This is the length minus the UDataInfo size 252 if (dataLength <= int32_t(sizeof(sectionSizes[0]))) { 253 // We don't even have a TOC! 254 goto invalidFormat; 255 } 256 table = reinterpret_cast<const uint16_t*>(sectionSizes); 257 tableStart = sectionSizes[0]; 258 sizeOfTOC = int32_t((tableStart + 1) * sizeof(sectionSizes[0])); 259 if (tableStart < minTocLength || dataLength <= sizeOfTOC) { 260 // We don't have a whole TOC! 261 goto invalidFormat; 262 } 263 gAliasData = data; 264 265 gMainTable.converterListSize = sectionSizes[1]; 266 gMainTable.tagListSize = sectionSizes[2]; 267 gMainTable.aliasListSize = sectionSizes[3]; 268 gMainTable.untaggedConvArraySize = sectionSizes[4]; 269 gMainTable.taggedAliasArraySize = sectionSizes[5]; 270 gMainTable.taggedAliasListsSize = sectionSizes[6]; 271 gMainTable.optionTableSize = sectionSizes[7]; 272 gMainTable.stringTableSize = sectionSizes[8]; 273 274 if (tableStart > minTocLength) { 275 gMainTable.normalizedStringTableSize = sectionSizes[9]; 276 } 277 278 sizeOfData = sizeOfTOC; 279 for (uint32_t section = 1; section <= tableStart; section++) { 280 sizeOfData += sectionSizes[section] * sizeof(table[0]); 281 } 282 if (dataLength < sizeOfData) { 283 // Truncated file! 284 goto invalidFormat; 285 } 286 // There may be some extra padding at the end, or this is a new file format with extra data that we can't read yet. 287 288 currOffset = (tableStart + 1) * (sizeof(uint32_t)/sizeof(uint16_t)); 289 gMainTable.converterList = table + currOffset; 290 291 currOffset += gMainTable.converterListSize; 292 gMainTable.tagList = table + currOffset; 293 294 currOffset += gMainTable.tagListSize; 295 gMainTable.aliasList = table + currOffset; 296 297 currOffset += gMainTable.aliasListSize; 298 gMainTable.untaggedConvArray = table + currOffset; 299 300 currOffset += gMainTable.untaggedConvArraySize; 301 gMainTable.taggedAliasArray = table + currOffset; 302 303 /* aliasLists is a 1's based array, but it has a padding character */ 304 currOffset += gMainTable.taggedAliasArraySize; 305 gMainTable.taggedAliasLists = table + currOffset; 306 307 currOffset += gMainTable.taggedAliasListsSize; 308 if (gMainTable.optionTableSize > 0 309 && reinterpret_cast<const UConverterAliasOptions*>(table + currOffset)->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) 310 { 311 /* Faster table */ 312 gMainTable.optionTable = reinterpret_cast<const UConverterAliasOptions*>(table + currOffset); 313 } 314 else { 315 /* Smaller table, or I can't handle this normalization mode! 316 Use the original slower table lookup. */ 317 gMainTable.optionTable = &defaultTableOptions; 318 } 319 320 currOffset += gMainTable.optionTableSize; 321 gMainTable.stringTable = table + currOffset; 322 323 currOffset += gMainTable.stringTableSize; 324 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) 325 ? gMainTable.stringTable : (table + currOffset)); 326 327 return; 328 329 invalidFormat: 330 errCode = U_INVALID_FORMAT_ERROR; 331 udata_close(data); 332 } 333 334 335 static UBool 336 haveAliasData(UErrorCode *pErrorCode) { 337 umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode); 338 return U_SUCCESS(*pErrorCode); 339 } 340 341 static inline UBool 342 isAlias(const char *alias, UErrorCode *pErrorCode) { 343 if(alias==nullptr) { 344 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 345 return false; 346 } 347 return *alias != 0; 348 } 349 350 static uint32_t getTagNumber(const char *tagname) { 351 if (gMainTable.tagList) { 352 uint32_t tagNum; 353 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { 354 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { 355 return tagNum; 356 } 357 } 358 } 359 360 return UINT32_MAX; 361 } 362 363 /* character types relevant for ucnv_compareNames() */ 364 enum { 365 UIGNORE, 366 ZERO, 367 NONZERO, 368 MINLETTER /* any values from here on are lowercase letter mappings */ 369 }; 370 371 /* character types for ASCII 00..7F */ 372 static const uint8_t asciiTypes[128] = { 373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 376 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, 377 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 378 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, 379 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 380 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 381 }; 382 383 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE) 384 385 /* character types for EBCDIC 80..FF */ 386 static const uint8_t ebcdicTypes[128] = { 387 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, 388 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, 389 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, 390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 391 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, 392 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, 393 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, 394 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 395 }; 396 397 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE) 398 399 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 400 # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) 401 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 402 # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) 403 #else 404 # error U_CHARSET_FAMILY is not valid 405 #endif 406 407 408 /* @see ucnv_compareNames */ 409 U_CAPI char * U_CALLCONV 410 ucnv_io_stripASCIIForCompare(char *dst, const char *name) { 411 char *dstItr = dst; 412 uint8_t type, nextType; 413 char c1; 414 UBool afterDigit = false; 415 416 while ((c1 = *name++) != 0) { 417 type = GET_ASCII_TYPE(c1); 418 switch (type) { 419 case UIGNORE: 420 afterDigit = false; 421 continue; /* ignore all but letters and digits */ 422 case ZERO: 423 if (!afterDigit) { 424 nextType = GET_ASCII_TYPE(*name); 425 if (nextType == ZERO || nextType == NONZERO) { 426 continue; /* ignore leading zero before another digit */ 427 } 428 } 429 break; 430 case NONZERO: 431 afterDigit = true; 432 break; 433 default: 434 c1 = (char)type; /* lowercased letter */ 435 afterDigit = false; 436 break; 437 } 438 *dstItr++ = c1; 439 } 440 *dstItr = 0; 441 return dst; 442 } 443 444 U_CAPI char * U_CALLCONV 445 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { 446 char *dstItr = dst; 447 uint8_t type, nextType; 448 char c1; 449 UBool afterDigit = false; 450 451 while ((c1 = *name++) != 0) { 452 type = GET_EBCDIC_TYPE(c1); 453 switch (type) { 454 case UIGNORE: 455 afterDigit = false; 456 continue; /* ignore all but letters and digits */ 457 case ZERO: 458 if (!afterDigit) { 459 nextType = GET_EBCDIC_TYPE(*name); 460 if (nextType == ZERO || nextType == NONZERO) { 461 continue; /* ignore leading zero before another digit */ 462 } 463 } 464 break; 465 case NONZERO: 466 afterDigit = true; 467 break; 468 default: 469 c1 = (char)type; /* lowercased letter */ 470 afterDigit = false; 471 break; 472 } 473 *dstItr++ = c1; 474 } 475 *dstItr = 0; 476 return dst; 477 } 478 479 /** 480 * Do a fuzzy compare of two converter/alias names. 481 * The comparison is case-insensitive, ignores leading zeroes if they are not 482 * followed by further digits, and ignores all but letters and digits. 483 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. 484 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 485 * at http://www.unicode.org/reports/tr22/ 486 * 487 * This is a symmetrical (commutative) operation; order of arguments 488 * is insignificant. This is an important property for sorting the 489 * list (when the list is preprocessed into binary form) and for 490 * performing binary searches on it at run time. 491 * 492 * @param name1 a converter name or alias, zero-terminated 493 * @param name2 a converter name or alias, zero-terminated 494 * @return 0 if the names match, or a negative value if the name1 495 * lexically precedes name2, or a positive value if the name1 496 * lexically follows name2. 497 * 498 * @see ucnv_io_stripForCompare 499 */ 500 U_CAPI int U_EXPORT2 501 ucnv_compareNames(const char *name1, const char *name2) { 502 int rc; 503 uint8_t type, nextType; 504 char c1, c2; 505 UBool afterDigit1 = false, afterDigit2 = false; 506 507 for (;;) { 508 while ((c1 = *name1++) != 0) { 509 type = GET_CHAR_TYPE(c1); 510 switch (type) { 511 case UIGNORE: 512 afterDigit1 = false; 513 continue; /* ignore all but letters and digits */ 514 case ZERO: 515 if (!afterDigit1) { 516 nextType = GET_CHAR_TYPE(*name1); 517 if (nextType == ZERO || nextType == NONZERO) { 518 continue; /* ignore leading zero before another digit */ 519 } 520 } 521 break; 522 case NONZERO: 523 afterDigit1 = true; 524 break; 525 default: 526 c1 = (char)type; /* lowercased letter */ 527 afterDigit1 = false; 528 break; 529 } 530 break; /* deliver c1 */ 531 } 532 while ((c2 = *name2++) != 0) { 533 type = GET_CHAR_TYPE(c2); 534 switch (type) { 535 case UIGNORE: 536 afterDigit2 = false; 537 continue; /* ignore all but letters and digits */ 538 case ZERO: 539 if (!afterDigit2) { 540 nextType = GET_CHAR_TYPE(*name2); 541 if (nextType == ZERO || nextType == NONZERO) { 542 continue; /* ignore leading zero before another digit */ 543 } 544 } 545 break; 546 case NONZERO: 547 afterDigit2 = true; 548 break; 549 default: 550 c2 = (char)type; /* lowercased letter */ 551 afterDigit2 = false; 552 break; 553 } 554 break; /* deliver c2 */ 555 } 556 557 /* If we reach the ends of both strings then they match */ 558 if ((c1|c2)==0) { 559 return 0; 560 } 561 562 /* Case-insensitive comparison */ 563 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; 564 if (rc != 0) { 565 return rc; 566 } 567 } 568 } 569 570 /* 571 * search for an alias 572 * return the converter number index for gConverterList 573 */ 574 static inline uint32_t 575 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { 576 uint32_t mid, start, limit; 577 uint32_t lastMid; 578 int result; 579 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); 580 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 581 582 if (!isUnnormalized) { 583 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { 584 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 585 return UINT32_MAX; 586 } 587 588 /* Lower case and remove ignoreable characters. */ 589 ucnv_io_stripForCompare(strippedName, alias); 590 alias = strippedName; 591 } 592 593 /* do a binary search for the alias */ 594 start = 0; 595 limit = gMainTable.untaggedConvArraySize; 596 mid = limit; 597 lastMid = UINT32_MAX; 598 599 for (;;) { 600 mid = (start + limit) / 2; 601 if (lastMid == mid) { /* Have we moved? */ 602 break; /* We haven't moved, and it wasn't found. */ 603 } 604 lastMid = mid; 605 if (isUnnormalized) { 606 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); 607 } 608 else { 609 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); 610 } 611 612 if (result < 0) { 613 limit = mid; 614 } else if (result > 0) { 615 start = mid; 616 } else { 617 /* Since the gencnval tool folds duplicates into one entry, 618 * this alias in gAliasList is unique, but different standards 619 * may map an alias to different converters. 620 */ 621 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { 622 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; 623 } 624 /* State whether the canonical converter name contains an option. 625 This information is contained in this list in order to maintain backward & forward compatibility. */ 626 if (containsOption) { 627 UBool containsCnvOptionInfo = static_cast<UBool>(gMainTable.optionTable->containsCnvOptionInfo); 628 *containsOption = static_cast<UBool>((containsCnvOptionInfo 629 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) 630 || !containsCnvOptionInfo); 631 } 632 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; 633 } 634 } 635 636 return UINT32_MAX; 637 } 638 639 /* 640 * Is this alias in this list? 641 * alias and listOffset should be non-nullptr. 642 */ 643 static inline UBool 644 isAliasInList(const char *alias, uint32_t listOffset) { 645 if (listOffset) { 646 uint32_t currAlias; 647 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 648 /* +1 to skip listCount */ 649 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 650 for (currAlias = 0; currAlias < listCount; currAlias++) { 651 if (currList[currAlias] 652 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) 653 { 654 return true; 655 } 656 } 657 } 658 return false; 659 } 660 661 /* 662 * Search for an standard name of an alias (what is the default name 663 * that this standard uses?) 664 * return the listOffset for gTaggedAliasLists. If it's 0, 665 * the it couldn't be found, but the parameters are valid. 666 */ 667 static uint32_t 668 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { 669 uint32_t idx; 670 uint32_t listOffset; 671 uint32_t convNum; 672 UErrorCode myErr = U_ZERO_ERROR; 673 uint32_t tagNum = getTagNumber(standard); 674 675 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ 676 convNum = findConverter(alias, nullptr, &myErr); 677 if (myErr != U_ZERO_ERROR) { 678 *pErrorCode = myErr; 679 } 680 681 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { 682 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; 683 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { 684 return listOffset; 685 } 686 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { 687 /* Uh Oh! They used an ambiguous alias. 688 We have to search the whole swiss cheese starting 689 at the highest standard affinity. 690 This may take a while. 691 */ 692 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { 693 listOffset = gMainTable.taggedAliasArray[idx]; 694 if (listOffset && isAliasInList(alias, listOffset)) { 695 uint32_t currTagNum = idx/gMainTable.converterListSize; 696 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); 697 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; 698 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { 699 return tempListOffset; 700 } 701 /* else keep on looking */ 702 /* We could speed this up by starting on the next row 703 because an alias is unique per row, right now. 704 This would change if alias versioning appears. */ 705 } 706 } 707 /* The standard doesn't know about the alias */ 708 } 709 /* else no default name */ 710 return 0; 711 } 712 /* else converter or tag not found */ 713 714 return UINT32_MAX; 715 } 716 717 /* Return the canonical name */ 718 static uint32_t 719 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { 720 uint32_t idx; 721 uint32_t listOffset; 722 uint32_t convNum; 723 UErrorCode myErr = U_ZERO_ERROR; 724 uint32_t tagNum = getTagNumber(standard); 725 726 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ 727 convNum = findConverter(alias, nullptr, &myErr); 728 if (myErr != U_ZERO_ERROR) { 729 *pErrorCode = myErr; 730 } 731 732 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { 733 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; 734 if (listOffset && isAliasInList(alias, listOffset)) { 735 return convNum; 736 } 737 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { 738 /* Uh Oh! They used an ambiguous alias. 739 We have to search one slice of the swiss cheese. 740 We search only in the requested tag, not the whole thing. 741 This may take a while. 742 */ 743 uint32_t convStart = (tagNum)*gMainTable.converterListSize; 744 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; 745 for (idx = convStart; idx < convLimit; idx++) { 746 listOffset = gMainTable.taggedAliasArray[idx]; 747 if (listOffset && isAliasInList(alias, listOffset)) { 748 return idx-convStart; 749 } 750 } 751 /* The standard doesn't know about the alias */ 752 } 753 /* else no canonical name */ 754 } 755 /* else converter or tag not found */ 756 757 return UINT32_MAX; 758 } 759 760 U_CAPI const char * 761 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { 762 const char *aliasTmp = alias; 763 int32_t i = 0; 764 for (i = 0; i < 2; i++) { 765 if (i == 1) { 766 /* 767 * After the first unsuccess converter lookup, check to see if 768 * the name begins with 'x-'. If it does, strip it off and try 769 * again. This behaviour is similar to how ICU4J does it. 770 */ 771 if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') { 772 aliasTmp = aliasTmp+2; 773 } else { 774 break; 775 } 776 } 777 if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { 778 uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); 779 if (convNum < gMainTable.converterListSize) { 780 return GET_STRING(gMainTable.converterList[convNum]); 781 } 782 /* else converter not found */ 783 } else { 784 break; 785 } 786 } 787 788 return nullptr; 789 } 790 791 U_CDECL_BEGIN 792 793 794 static int32_t U_CALLCONV 795 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 796 int32_t value = 0; 797 UAliasContext *myContext = (UAliasContext *)(enumerator->context); 798 uint32_t listOffset = myContext->listOffset; 799 800 if (listOffset) { 801 value = gMainTable.taggedAliasLists[listOffset]; 802 } 803 return value; 804 } 805 806 static const char * U_CALLCONV 807 ucnv_io_nextStandardAliases(UEnumeration *enumerator, 808 int32_t* resultLength, 809 UErrorCode * /*pErrorCode*/) 810 { 811 UAliasContext *myContext = (UAliasContext *)(enumerator->context); 812 uint32_t listOffset = myContext->listOffset; 813 814 if (listOffset) { 815 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 816 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 817 818 if (myContext->listIdx < listCount) { 819 const char *myStr = GET_STRING(currList[myContext->listIdx++]); 820 if (resultLength) { 821 *resultLength = (int32_t)uprv_strlen(myStr); 822 } 823 return myStr; 824 } 825 } 826 /* Either we accessed a zero length list, or we enumerated too far. */ 827 if (resultLength) { 828 *resultLength = 0; 829 } 830 return nullptr; 831 } 832 833 static void U_CALLCONV 834 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 835 ((UAliasContext *)(enumerator->context))->listIdx = 0; 836 } 837 838 static void U_CALLCONV 839 ucnv_io_closeUEnumeration(UEnumeration *enumerator) { 840 uprv_free(enumerator->context); 841 uprv_free(enumerator); 842 } 843 844 U_CDECL_END 845 846 /* Enumerate the aliases for the specified converter and standard tag */ 847 static const UEnumeration gEnumAliases = { 848 nullptr, 849 nullptr, 850 ucnv_io_closeUEnumeration, 851 ucnv_io_countStandardAliases, 852 uenum_unextDefault, 853 ucnv_io_nextStandardAliases, 854 ucnv_io_resetStandardAliases 855 }; 856 857 U_CAPI UEnumeration * U_EXPORT2 858 ucnv_openStandardNames(const char *convName, 859 const char *standard, 860 UErrorCode *pErrorCode) 861 { 862 UEnumeration *myEnum = nullptr; 863 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { 864 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); 865 866 /* When listOffset == 0, we want to acknowledge that the 867 converter name and standard are okay, but there 868 is nothing to enumerate. */ 869 if (listOffset < gMainTable.taggedAliasListsSize) { 870 UAliasContext *myContext; 871 872 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); 873 if (myEnum == nullptr) { 874 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 875 return nullptr; 876 } 877 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); 878 myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext))); 879 if (myContext == nullptr) { 880 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 881 uprv_free(myEnum); 882 return nullptr; 883 } 884 myContext->listOffset = listOffset; 885 myContext->listIdx = 0; 886 myEnum->context = myContext; 887 } 888 /* else converter or tag not found */ 889 } 890 return myEnum; 891 } 892 893 static uint16_t 894 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { 895 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 896 uint32_t convNum = findConverter(alias, nullptr, pErrorCode); 897 if (convNum < gMainTable.converterListSize) { 898 /* tagListNum - 1 is the ALL tag */ 899 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 900 901 if (listOffset) { 902 return gMainTable.taggedAliasLists[listOffset]; 903 } 904 /* else this shouldn't happen. internal program error */ 905 } 906 /* else converter not found */ 907 } 908 return 0; 909 } 910 911 static uint16_t 912 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { 913 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 914 uint32_t currAlias; 915 uint32_t convNum = findConverter(alias, nullptr, pErrorCode); 916 if (convNum < gMainTable.converterListSize) { 917 /* tagListNum - 1 is the ALL tag */ 918 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 919 920 if (listOffset) { 921 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 922 /* +1 to skip listCount */ 923 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 924 925 for (currAlias = start; currAlias < listCount; currAlias++) { 926 aliases[currAlias] = GET_STRING(currList[currAlias]); 927 } 928 } 929 /* else this shouldn't happen. internal program error */ 930 } 931 /* else converter not found */ 932 } 933 return 0; 934 } 935 936 static const char * 937 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { 938 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 939 uint32_t convNum = findConverter(alias, nullptr, pErrorCode); 940 if (convNum < gMainTable.converterListSize) { 941 /* tagListNum - 1 is the ALL tag */ 942 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 943 944 if (listOffset) { 945 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 946 /* +1 to skip listCount */ 947 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 948 949 if (n < listCount) { 950 return GET_STRING(currList[n]); 951 } 952 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 953 } 954 /* else this shouldn't happen. internal program error */ 955 } 956 /* else converter not found */ 957 } 958 return nullptr; 959 } 960 961 static uint16_t 962 ucnv_io_countStandards(UErrorCode *pErrorCode) { 963 if (haveAliasData(pErrorCode)) { 964 /* Don't include the empty list */ 965 return static_cast<uint16_t>(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); 966 } 967 968 return 0; 969 } 970 971 U_CAPI const char * U_EXPORT2 972 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { 973 if (haveAliasData(pErrorCode)) { 974 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { 975 return GET_STRING(gMainTable.tagList[n]); 976 } 977 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 978 } 979 980 return nullptr; 981 } 982 983 U_CAPI const char * U_EXPORT2 984 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { 985 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 986 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); 987 988 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { 989 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 990 991 /* Get the preferred name from this list */ 992 if (currList[0]) { 993 return GET_STRING(currList[0]); 994 } 995 /* else someone screwed up the alias table. */ 996 /* *pErrorCode = U_INVALID_FORMAT_ERROR */ 997 } 998 } 999 1000 return nullptr; 1001 } 1002 1003 U_CAPI uint16_t U_EXPORT2 1004 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) 1005 { 1006 return ucnv_io_countAliases(alias, pErrorCode); 1007 } 1008 1009 1010 U_CAPI const char* U_EXPORT2 1011 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) 1012 { 1013 return ucnv_io_getAlias(alias, n, pErrorCode); 1014 } 1015 1016 U_CAPI void U_EXPORT2 1017 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) 1018 { 1019 ucnv_io_getAliases(alias, 0, aliases, pErrorCode); 1020 } 1021 1022 U_CAPI uint16_t U_EXPORT2 1023 ucnv_countStandards() 1024 { 1025 UErrorCode err = U_ZERO_ERROR; 1026 return ucnv_io_countStandards(&err); 1027 } 1028 1029 U_CAPI const char * U_EXPORT2 1030 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { 1031 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 1032 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); 1033 1034 if (convNum < gMainTable.converterListSize) { 1035 return GET_STRING(gMainTable.converterList[convNum]); 1036 } 1037 } 1038 1039 return nullptr; 1040 } 1041 1042 U_CDECL_BEGIN 1043 1044 1045 static int32_t U_CALLCONV 1046 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { 1047 return gMainTable.converterListSize; 1048 } 1049 1050 static const char * U_CALLCONV 1051 ucnv_io_nextAllConverters(UEnumeration *enumerator, 1052 int32_t* resultLength, 1053 UErrorCode * /*pErrorCode*/) 1054 { 1055 uint16_t *myContext = (uint16_t *)(enumerator->context); 1056 1057 if (*myContext < gMainTable.converterListSize) { 1058 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); 1059 if (resultLength) { 1060 *resultLength = (int32_t)uprv_strlen(myStr); 1061 } 1062 return myStr; 1063 } 1064 /* Either we accessed a zero length list, or we enumerated too far. */ 1065 if (resultLength) { 1066 *resultLength = 0; 1067 } 1068 return nullptr; 1069 } 1070 1071 static void U_CALLCONV 1072 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 1073 *((uint16_t *)(enumerator->context)) = 0; 1074 } 1075 U_CDECL_END 1076 static const UEnumeration gEnumAllConverters = { 1077 nullptr, 1078 nullptr, 1079 ucnv_io_closeUEnumeration, 1080 ucnv_io_countAllConverters, 1081 uenum_unextDefault, 1082 ucnv_io_nextAllConverters, 1083 ucnv_io_resetAllConverters 1084 }; 1085 1086 U_CAPI UEnumeration * U_EXPORT2 1087 ucnv_openAllNames(UErrorCode *pErrorCode) { 1088 UEnumeration *myEnum = nullptr; 1089 if (haveAliasData(pErrorCode)) { 1090 uint16_t *myContext; 1091 1092 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); 1093 if (myEnum == nullptr) { 1094 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1095 return nullptr; 1096 } 1097 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); 1098 myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t))); 1099 if (myContext == nullptr) { 1100 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1101 uprv_free(myEnum); 1102 return nullptr; 1103 } 1104 *myContext = 0; 1105 myEnum->context = myContext; 1106 } 1107 return myEnum; 1108 } 1109 1110 U_CAPI uint16_t 1111 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { 1112 if (haveAliasData(pErrorCode)) { 1113 return (uint16_t)gMainTable.converterListSize; 1114 } 1115 return 0; 1116 } 1117 1118 /* alias table swapping ----------------------------------------------------- */ 1119 1120 U_CDECL_BEGIN 1121 1122 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); 1123 U_CDECL_END 1124 1125 1126 /* 1127 * row of a temporary array 1128 * 1129 * gets platform-endian charset string indexes and sorting indexes; 1130 * after sorting this array by strings, the actual arrays are permutated 1131 * according to the sorting indexes 1132 */ 1133 typedef struct TempRow { 1134 uint16_t strIndex, sortIndex; 1135 } TempRow; 1136 1137 typedef struct TempAliasTable { 1138 const char *chars; 1139 TempRow *rows; 1140 uint16_t *resort; 1141 StripForCompareFn *stripForCompare; 1142 } TempAliasTable; 1143 1144 enum { 1145 STACK_ROW_CAPACITY=500 1146 }; 1147 1148 static int32_t U_CALLCONV 1149 io_compareRows(const void *context, const void *left, const void *right) { 1150 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], 1151 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; 1152 1153 TempAliasTable *tempTable=(TempAliasTable *)context; 1154 const char *chars=tempTable->chars; 1155 1156 return static_cast<int32_t>(uprv_strcmp( 1157 tempTable->stripForCompare(strippedLeft, chars + 2 * static_cast<const TempRow*>(left)->strIndex), 1158 tempTable->stripForCompare(strippedRight, chars + 2 * static_cast<const TempRow*>(right)->strIndex))); 1159 } 1160 1161 U_CAPI int32_t U_EXPORT2 1162 ucnv_swapAliases(const UDataSwapper *ds, 1163 const void *inData, int32_t length, void *outData, 1164 UErrorCode *pErrorCode) { 1165 const UDataInfo *pInfo; 1166 int32_t headerSize; 1167 1168 const uint16_t *inTable; 1169 const uint32_t *inSectionSizes; 1170 uint32_t toc[offsetsCount]; 1171 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ 1172 uint32_t i, count, tocLength, topOffset; 1173 1174 TempRow rows[STACK_ROW_CAPACITY]; 1175 uint16_t resort[STACK_ROW_CAPACITY]; 1176 TempAliasTable tempTable; 1177 1178 /* udata_swapDataHeader checks the arguments */ 1179 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1180 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 1181 return 0; 1182 } 1183 1184 /* check data format and format version */ 1185 pInfo=(const UDataInfo *)((const char *)inData+4); 1186 if(!( 1187 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ 1188 pInfo->dataFormat[1]==0x76 && 1189 pInfo->dataFormat[2]==0x41 && 1190 pInfo->dataFormat[3]==0x6c && 1191 pInfo->formatVersion[0]==3 1192 )) { 1193 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", 1194 pInfo->dataFormat[0], pInfo->dataFormat[1], 1195 pInfo->dataFormat[2], pInfo->dataFormat[3], 1196 pInfo->formatVersion[0]); 1197 *pErrorCode=U_UNSUPPORTED_ERROR; 1198 return 0; 1199 } 1200 1201 /* an alias table must contain at least the table of contents array */ 1202 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { 1203 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", 1204 length-headerSize); 1205 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1206 return 0; 1207 } 1208 1209 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); 1210 inTable=(const uint16_t *)inSectionSizes; 1211 uprv_memset(toc, 0, sizeof(toc)); 1212 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); 1213 if(tocLength<minTocLength || offsetsCount<=tocLength) { 1214 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); 1215 *pErrorCode=U_INVALID_FORMAT_ERROR; 1216 return 0; 1217 } 1218 1219 /* read the known part of the table of contents */ 1220 for(i=converterListIndex; i<=tocLength; ++i) { 1221 toc[i]=ds->readUInt32(inSectionSizes[i]); 1222 } 1223 1224 /* compute offsets */ 1225 uprv_memset(offsets, 0, sizeof(offsets)); 1226 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ 1227 for(i=tagListIndex; i<=tocLength; ++i) { 1228 offsets[i]=offsets[i-1]+toc[i-1]; 1229 } 1230 1231 /* compute the overall size of the after-header data, in numbers of 16-bit units */ 1232 topOffset=offsets[i-1]+toc[i-1]; 1233 1234 if(length>=0) { 1235 uint16_t *outTable; 1236 const uint16_t *p, *p2; 1237 uint16_t *q, *q2; 1238 uint16_t oldIndex; 1239 1240 if((length-headerSize)<(2*(int32_t)topOffset)) { 1241 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", 1242 length-headerSize); 1243 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1244 return 0; 1245 } 1246 1247 outTable=(uint16_t *)((char *)outData+headerSize); 1248 1249 /* swap the entire table of contents */ 1250 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); 1251 1252 /* swap unormalized strings & normalized strings */ 1253 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), 1254 outTable+offsets[stringTableIndex], pErrorCode); 1255 if(U_FAILURE(*pErrorCode)) { 1256 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); 1257 return 0; 1258 } 1259 1260 if(ds->inCharset==ds->outCharset) { 1261 /* no need to sort, just swap all 16-bit values together */ 1262 ds->swapArray16(ds, 1263 inTable+offsets[converterListIndex], 1264 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), 1265 outTable+offsets[converterListIndex], 1266 pErrorCode); 1267 } else { 1268 /* allocate the temporary table for sorting */ 1269 count=toc[aliasListIndex]; 1270 1271 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ 1272 1273 if(count<=STACK_ROW_CAPACITY) { 1274 tempTable.rows=rows; 1275 tempTable.resort=resort; 1276 } else { 1277 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); 1278 if(tempTable.rows==nullptr) { 1279 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", 1280 count); 1281 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1282 return 0; 1283 } 1284 tempTable.resort=(uint16_t *)(tempTable.rows+count); 1285 } 1286 1287 if(ds->outCharset==U_ASCII_FAMILY) { 1288 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; 1289 } else /* U_EBCDIC_FAMILY */ { 1290 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; 1291 } 1292 1293 /* 1294 * Sort unique aliases+mapped names. 1295 * 1296 * We need to sort the list again by outCharset strings because they 1297 * sort differently for different charset families. 1298 * First we set up a temporary table with the string indexes and 1299 * sorting indexes and sort that. 1300 * Then we permutate and copy/swap the actual values. 1301 */ 1302 p=inTable+offsets[aliasListIndex]; 1303 q=outTable+offsets[aliasListIndex]; 1304 1305 p2=inTable+offsets[untaggedConvArrayIndex]; 1306 q2=outTable+offsets[untaggedConvArrayIndex]; 1307 1308 for(i=0; i<count; ++i) { 1309 tempTable.rows[i].strIndex=ds->readUInt16(p[i]); 1310 tempTable.rows[i].sortIndex=(uint16_t)i; 1311 } 1312 1313 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), 1314 io_compareRows, &tempTable, 1315 false, pErrorCode); 1316 1317 if(U_SUCCESS(*pErrorCode)) { 1318 /* copy/swap/permutate items */ 1319 if(p!=q) { 1320 for(i=0; i<count; ++i) { 1321 oldIndex=tempTable.rows[i].sortIndex; 1322 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); 1323 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); 1324 } 1325 } else { 1326 /* 1327 * If we swap in-place, then the permutation must use another 1328 * temporary array (tempTable.resort) 1329 * before the results are copied to the outBundle. 1330 */ 1331 uint16_t *r=tempTable.resort; 1332 1333 for(i=0; i<count; ++i) { 1334 oldIndex=tempTable.rows[i].sortIndex; 1335 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); 1336 } 1337 uprv_memcpy(q, r, 2*(size_t)count); 1338 1339 for(i=0; i<count; ++i) { 1340 oldIndex=tempTable.rows[i].sortIndex; 1341 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); 1342 } 1343 uprv_memcpy(q2, r, 2*(size_t)count); 1344 } 1345 } 1346 1347 if(tempTable.rows!=rows) { 1348 uprv_free(tempTable.rows); 1349 } 1350 1351 if(U_FAILURE(*pErrorCode)) { 1352 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", 1353 count); 1354 return 0; 1355 } 1356 1357 /* swap remaining 16-bit values */ 1358 ds->swapArray16(ds, 1359 inTable+offsets[converterListIndex], 1360 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), 1361 outTable+offsets[converterListIndex], 1362 pErrorCode); 1363 ds->swapArray16(ds, 1364 inTable+offsets[taggedAliasArrayIndex], 1365 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), 1366 outTable+offsets[taggedAliasArrayIndex], 1367 pErrorCode); 1368 } 1369 } 1370 1371 return headerSize+2*(int32_t)topOffset; 1372 } 1373 1374 #endif 1375 1376 1377 /* 1378 * Hey, Emacs, please set the following: 1379 * 1380 * Local Variables: 1381 * indent-tabs-mode: nil 1382 * End: 1383 * 1384 */