tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucnv_io.cpp (50074B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 *
     12 *  ucnv_io.cpp:
     13 *  initializes global variables and defines functions pertaining to converter 
     14 *  name resolution aspect of the conversion code.
     15 *
     16 *   new implementation:
     17 *
     18 *   created on: 1999nov22
     19 *   created by: Markus W. Scherer
     20 *
     21 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
     22 *   with aliases for converter names.
     23 *
     24 *   Date        Name        Description
     25 *   11/22/1999  markus      Created
     26 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
     27 *                           Now an alias can map to different converters
     28 *                           depending on the specified standard.
     29 *******************************************************************************
     30 */
     31 
     32 #include "unicode/utypes.h"
     33 
     34 #if !UCONFIG_NO_CONVERSION
     35 
     36 #include "unicode/ucnv.h"
     37 #include "unicode/udata.h"
     38 
     39 #include "umutex.h"
     40 #include "uarrsort.h"
     41 #include "uassert.h"
     42 #include "udataswp.h"
     43 #include "udatamem.h"
     44 #include "cstring.h"
     45 #include "cmemory.h"
     46 #include "ucnv_io.h"
     47 #include "uenumimp.h"
     48 #include "ucln_cmn.h"
     49 
     50 /* Format of cnvalias.icu -----------------------------------------------------
     51 *
     52 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
     53 * This binary form contains several tables. All indexes are to uint16_t
     54 * units, and not to the bytes (uint8_t units). Addressing everything on
     55 * 16-bit boundaries allows us to store more information with small index
     56 * numbers, which are also 16-bit in size. The majority of the table (except
     57 * the string table) are 16-bit numbers.
     58 *
     59 * First there is the size of the Table of Contents (TOC). The TOC
     60 * entries contain the size of each section. In order to find the offset
     61 * you just need to sum up the previous offsets.
     62 * The TOC length and entries are an array of uint32_t values.
     63 * The first section after the TOC starts immediately after the TOC.
     64 *
     65 * 1) This section contains a list of converters. This list contains indexes
     66 * into the string table for the converter name. The index of this list is
     67 * also used by other sections, which are mentioned later on.
     68 * This list is not sorted.
     69 *
     70 * 2) This section contains a list of tags. This list contains indexes
     71 * into the string table for the tag name. The index of this list is
     72 * also used by other sections, which are mentioned later on.
     73 * This list is in priority order of standards.
     74 *
     75 * 3) This section contains a list of sorted unique aliases. This
     76 * list contains indexes into the string table for the alias name. The
     77 * index of this list is also used by other sections, like the 4th section.
     78 * The index for the 3rd and 4th section is used to get the
     79 * alias -> converter name mapping. Section 3 and 4 form a two column table.
     80 * Some of the most significant bits of each index may contain other
     81 * information (see findConverter for details).
     82 *
     83 * 4) This section contains a list of mapped converter names. Consider this
     84 * as a table that maps the 3rd section to the 1st section. This list contains
     85 * indexes into the 1st section. The index of this list is the same index in
     86 * the 3rd section. There is also some extra information in the high bits of
     87 * each converter index in this table. Currently it's only used to say that
     88 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
     89 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
     90 * the predigested form of the 5th section so that an alias lookup can be fast.
     91 *
     92 * 5) This section contains a 2D array with indexes to the 6th section. This
     93 * section is the full form of all alias mappings. The column index is the
     94 * index into the converter list (column header). The row index is the index
     95 * to tag list (row header). This 2D array is the top part a 3D array. The
     96 * third dimension is in the 6th section.
     97 *
     98 * 6) This is blob of variable length arrays. Each array starts with a size,
     99 * and is followed by indexes to alias names in the string table. This is
    100 * the third dimension to the section 5. No other section should be referencing
    101 * this section.
    102 *
    103 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
    104 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
    105 * what type of string normalization is used among other potential things in the
    106 * future.
    107 *
    108 * 8) This is the string table. All strings are indexed on an even address.
    109 * There are two reasons for this. First many chip architectures locate strings
    110 * faster on even address boundaries. Second, since all indexes are 16-bit
    111 * numbers, this string table can be 128KB in size instead of 64KB when we
    112 * only have strings starting on an even address.
    113 *
    114 * 9) When present this is a set of prenormalized strings from section 8. This
    115 * table contains normalized strings with the dashes and spaces stripped out,
    116 * and all strings lowercased. In the future, the options in section 7 may state
    117 * other types of normalization.
    118 *
    119 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
    120 * has a unique alias among all converters. That same alias can
    121 * be mentioned in other standards on different converters,
    122 * but only one alias per tag can be unique.
    123 *
    124 *
    125 *              Converter Names (Usually in TR22 form)
    126 *           -------------------------------------------.
    127 *     T    /                                          /|
    128 *     a   /                                          / |
    129 *     g  /                                          /  |
    130 *     s /                                          /   |
    131 *      /                                          /    |
    132 *      ------------------------------------------/     |
    133 *    A |                                         |     |
    134 *    l |                                         |     |
    135 *    i |                                         |    /
    136 *    a |                                         |   /
    137 *    s |                                         |  /
    138 *    e |                                         | /
    139 *    s |                                         |/
    140 *      -------------------------------------------
    141 *
    142 *
    143 *
    144 * Here is what it really looks like. It's like swiss cheese.
    145 * There are holes. Some converters aren't recognized by
    146 * a standard, or they are really old converters that the
    147 * standard doesn't recognize anymore.
    148 *
    149 *              Converter Names (Usually in TR22 form)
    150 *           -------------------------------------------.
    151 *     T    /##########################################/|
    152 *     a   /     #            #                       /#
    153 *     g  /  #      ##     ##     ### # ### ### ### #/
    154 *     s / #             #####  ####        ##  ## #/#
    155 *      / ### # # ##  #  #   #          ### # #   #/##
    156 *      ------------------------------------------/# #
    157 *    A |### # # ##  #  #   #          ### # #   #|# #
    158 *    l |# # #    #     #               ## #     #|# #
    159 *    i |# # #    #     #                #       #|#
    160 *    a |#                                       #|#
    161 *    s |                                        #|#
    162 *    e
    163 *    s
    164 *
    165 */
    166 
    167 /**
    168 * Used by the UEnumeration API
    169 */
    170 typedef struct UAliasContext {
    171    uint32_t listOffset;
    172    uint32_t listIdx;
    173 } UAliasContext;
    174 
    175 static const char DATA_NAME[] = "cnvalias";
    176 static const char DATA_TYPE[] = "icu";
    177 
    178 static UDataMemory *gAliasData=nullptr;
    179 static icu::UInitOnce gAliasDataInitOnce {};
    180 
    181 enum {
    182    tocLengthIndex=0,
    183    converterListIndex=1,
    184    tagListIndex=2,
    185    aliasListIndex=3,
    186    untaggedConvArrayIndex=4,
    187    taggedAliasArrayIndex=5,
    188    taggedAliasListsIndex=6,
    189    tableOptionsIndex=7,
    190    stringTableIndex=8,
    191    normalizedStringTableIndex=9,
    192    offsetsCount,    /* length of the swapper's temporary offsets[] */
    193    minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
    194 };
    195 
    196 static const UConverterAliasOptions defaultTableOptions = {
    197    UCNV_IO_UNNORMALIZED,
    198    0 /* containsCnvOptionInfo */
    199 };
    200 static UConverterAlias gMainTable;
    201 
    202 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
    203 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
    204 
    205 static UBool U_CALLCONV
    206 isAcceptable(void * /*context*/,
    207             const char * /*type*/, const char * /*name*/,
    208             const UDataInfo *pInfo) {
    209    return
    210        pInfo->size>=20 &&
    211        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    212        pInfo->charsetFamily==U_CHARSET_FAMILY &&
    213        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    214        pInfo->dataFormat[1]==0x76 &&
    215        pInfo->dataFormat[2]==0x41 &&
    216        pInfo->dataFormat[3]==0x6c &&
    217        pInfo->formatVersion[0]==3;
    218 }
    219 
    220 static UBool U_CALLCONV ucnv_io_cleanup()
    221 {
    222    if (gAliasData) {
    223        udata_close(gAliasData);
    224        gAliasData = nullptr;
    225    }
    226    gAliasDataInitOnce.reset();
    227 
    228    uprv_memset(&gMainTable, 0, sizeof(gMainTable));
    229 
    230    return true;                   /* Everything was cleaned up */
    231 }
    232 
    233 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
    234    UDataMemory *data;
    235    const uint16_t *table;
    236    const uint32_t *sectionSizes;
    237    uint32_t tableStart;
    238    uint32_t currOffset;
    239    int32_t sizeOfData;
    240    int32_t sizeOfTOC;
    241 
    242    ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
    243 
    244    U_ASSERT(gAliasData == nullptr);
    245    data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode);
    246    if (U_FAILURE(errCode)) {
    247        return;
    248    }
    249 
    250    sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data));
    251    int32_t dataLength = udata_getLength(data); // This is the length minus the UDataInfo size
    252    if (dataLength <= int32_t(sizeof(sectionSizes[0]))) {
    253        // We don't even have a TOC!
    254        goto invalidFormat;
    255    }
    256    table = reinterpret_cast<const uint16_t*>(sectionSizes);
    257    tableStart = sectionSizes[0];
    258    sizeOfTOC = int32_t((tableStart + 1) * sizeof(sectionSizes[0]));
    259    if (tableStart < minTocLength || dataLength <= sizeOfTOC) {
    260        // We don't have a whole TOC!
    261        goto invalidFormat;
    262    }
    263    gAliasData = data;
    264 
    265    gMainTable.converterListSize      = sectionSizes[1];
    266    gMainTable.tagListSize            = sectionSizes[2];
    267    gMainTable.aliasListSize          = sectionSizes[3];
    268    gMainTable.untaggedConvArraySize  = sectionSizes[4];
    269    gMainTable.taggedAliasArraySize   = sectionSizes[5];
    270    gMainTable.taggedAliasListsSize   = sectionSizes[6];
    271    gMainTable.optionTableSize        = sectionSizes[7];
    272    gMainTable.stringTableSize        = sectionSizes[8];
    273 
    274    if (tableStart > minTocLength) {
    275        gMainTable.normalizedStringTableSize = sectionSizes[9];
    276    }
    277 
    278    sizeOfData = sizeOfTOC;
    279    for (uint32_t section = 1; section <= tableStart; section++) {
    280        sizeOfData += sectionSizes[section] * sizeof(table[0]);
    281    }
    282    if (dataLength < sizeOfData) {
    283        // Truncated file!
    284        goto invalidFormat;
    285    }
    286    // There may be some extra padding at the end, or this is a new file format with extra data that we can't read yet.
    287 
    288    currOffset = (tableStart + 1) * (sizeof(uint32_t)/sizeof(uint16_t));
    289    gMainTable.converterList = table + currOffset;
    290 
    291    currOffset += gMainTable.converterListSize;
    292    gMainTable.tagList = table + currOffset;
    293 
    294    currOffset += gMainTable.tagListSize;
    295    gMainTable.aliasList = table + currOffset;
    296 
    297    currOffset += gMainTable.aliasListSize;
    298    gMainTable.untaggedConvArray = table + currOffset;
    299 
    300    currOffset += gMainTable.untaggedConvArraySize;
    301    gMainTable.taggedAliasArray = table + currOffset;
    302 
    303    /* aliasLists is a 1's based array, but it has a padding character */
    304    currOffset += gMainTable.taggedAliasArraySize;
    305    gMainTable.taggedAliasLists = table + currOffset;
    306 
    307    currOffset += gMainTable.taggedAliasListsSize;
    308    if (gMainTable.optionTableSize > 0
    309        && reinterpret_cast<const UConverterAliasOptions*>(table + currOffset)->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
    310    {
    311        /* Faster table */
    312        gMainTable.optionTable = reinterpret_cast<const UConverterAliasOptions*>(table + currOffset);
    313    }
    314    else {
    315        /* Smaller table, or I can't handle this normalization mode!
    316        Use the original slower table lookup. */
    317        gMainTable.optionTable = &defaultTableOptions;
    318    }
    319 
    320    currOffset += gMainTable.optionTableSize;
    321    gMainTable.stringTable = table + currOffset;
    322 
    323    currOffset += gMainTable.stringTableSize;
    324    gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
    325        ? gMainTable.stringTable : (table + currOffset));
    326 
    327    return;
    328 
    329 invalidFormat:
    330    errCode = U_INVALID_FORMAT_ERROR;
    331    udata_close(data);
    332 }
    333 
    334 
    335 static UBool
    336 haveAliasData(UErrorCode *pErrorCode) {
    337    umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
    338    return U_SUCCESS(*pErrorCode);
    339 }
    340 
    341 static inline UBool
    342 isAlias(const char *alias, UErrorCode *pErrorCode) {
    343    if(alias==nullptr) {
    344        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    345        return false;
    346    }
    347    return *alias != 0;
    348 }
    349 
    350 static uint32_t getTagNumber(const char *tagname) {
    351    if (gMainTable.tagList) {
    352        uint32_t tagNum;
    353        for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
    354            if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
    355                return tagNum;
    356            }
    357        }
    358    }
    359 
    360    return UINT32_MAX;
    361 }
    362 
    363 /* character types relevant for ucnv_compareNames() */
    364 enum {
    365    UIGNORE,
    366    ZERO,
    367    NONZERO,
    368    MINLETTER /* any values from here on are lowercase letter mappings */
    369 };
    370 
    371 /* character types for ASCII 00..7F */
    372 static const uint8_t asciiTypes[128] = {
    373    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    374    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    375    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    376    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
    377    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    378    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
    379    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    380    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
    381 };
    382 
    383 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
    384 
    385 /* character types for EBCDIC 80..FF */
    386 static const uint8_t ebcdicTypes[128] = {
    387    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    388    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    389    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    390    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    391    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    392    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    393    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    394    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
    395 };
    396 
    397 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
    398 
    399 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    400 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
    401 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    402 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
    403 #else
    404 #   error U_CHARSET_FAMILY is not valid
    405 #endif
    406 
    407 
    408 /* @see ucnv_compareNames */
    409 U_CAPI char * U_CALLCONV
    410 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
    411    char *dstItr = dst;
    412    uint8_t type, nextType;
    413    char c1;
    414    UBool afterDigit = false;
    415 
    416    while ((c1 = *name++) != 0) {
    417        type = GET_ASCII_TYPE(c1);
    418        switch (type) {
    419        case UIGNORE:
    420            afterDigit = false;
    421            continue; /* ignore all but letters and digits */
    422        case ZERO:
    423            if (!afterDigit) {
    424                nextType = GET_ASCII_TYPE(*name);
    425                if (nextType == ZERO || nextType == NONZERO) {
    426                    continue; /* ignore leading zero before another digit */
    427                }
    428            }
    429            break;
    430        case NONZERO:
    431            afterDigit = true;
    432            break;
    433        default:
    434            c1 = (char)type; /* lowercased letter */
    435            afterDigit = false;
    436            break;
    437        }
    438        *dstItr++ = c1;
    439    }
    440    *dstItr = 0;
    441    return dst;
    442 }
    443 
    444 U_CAPI char * U_CALLCONV
    445 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
    446    char *dstItr = dst;
    447    uint8_t type, nextType;
    448    char c1;
    449    UBool afterDigit = false;
    450 
    451    while ((c1 = *name++) != 0) {
    452        type = GET_EBCDIC_TYPE(c1);
    453        switch (type) {
    454        case UIGNORE:
    455            afterDigit = false;
    456            continue; /* ignore all but letters and digits */
    457        case ZERO:
    458            if (!afterDigit) {
    459                nextType = GET_EBCDIC_TYPE(*name);
    460                if (nextType == ZERO || nextType == NONZERO) {
    461                    continue; /* ignore leading zero before another digit */
    462                }
    463            }
    464            break;
    465        case NONZERO:
    466            afterDigit = true;
    467            break;
    468        default:
    469            c1 = (char)type; /* lowercased letter */
    470            afterDigit = false;
    471            break;
    472        }
    473        *dstItr++ = c1;
    474    }
    475    *dstItr = 0;
    476    return dst;
    477 }
    478 
    479 /**
    480 * Do a fuzzy compare of two converter/alias names.
    481 * The comparison is case-insensitive, ignores leading zeroes if they are not
    482 * followed by further digits, and ignores all but letters and digits.
    483 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
    484 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
    485 * at http://www.unicode.org/reports/tr22/
    486 *
    487 * This is a symmetrical (commutative) operation; order of arguments
    488 * is insignificant.  This is an important property for sorting the
    489 * list (when the list is preprocessed into binary form) and for
    490 * performing binary searches on it at run time.
    491 *
    492 * @param name1 a converter name or alias, zero-terminated
    493 * @param name2 a converter name or alias, zero-terminated
    494 * @return 0 if the names match, or a negative value if the name1
    495 * lexically precedes name2, or a positive value if the name1
    496 * lexically follows name2.
    497 *
    498 * @see ucnv_io_stripForCompare
    499 */
    500 U_CAPI int U_EXPORT2
    501 ucnv_compareNames(const char *name1, const char *name2) {
    502    int rc;
    503    uint8_t type, nextType;
    504    char c1, c2;
    505    UBool afterDigit1 = false, afterDigit2 = false;
    506 
    507    for (;;) {
    508        while ((c1 = *name1++) != 0) {
    509            type = GET_CHAR_TYPE(c1);
    510            switch (type) {
    511            case UIGNORE:
    512                afterDigit1 = false;
    513                continue; /* ignore all but letters and digits */
    514            case ZERO:
    515                if (!afterDigit1) {
    516                    nextType = GET_CHAR_TYPE(*name1);
    517                    if (nextType == ZERO || nextType == NONZERO) {
    518                        continue; /* ignore leading zero before another digit */
    519                    }
    520                }
    521                break;
    522            case NONZERO:
    523                afterDigit1 = true;
    524                break;
    525            default:
    526                c1 = (char)type; /* lowercased letter */
    527                afterDigit1 = false;
    528                break;
    529            }
    530            break; /* deliver c1 */
    531        }
    532        while ((c2 = *name2++) != 0) {
    533            type = GET_CHAR_TYPE(c2);
    534            switch (type) {
    535            case UIGNORE:
    536                afterDigit2 = false;
    537                continue; /* ignore all but letters and digits */
    538            case ZERO:
    539                if (!afterDigit2) {
    540                    nextType = GET_CHAR_TYPE(*name2);
    541                    if (nextType == ZERO || nextType == NONZERO) {
    542                        continue; /* ignore leading zero before another digit */
    543                    }
    544                }
    545                break;
    546            case NONZERO:
    547                afterDigit2 = true;
    548                break;
    549            default:
    550                c2 = (char)type; /* lowercased letter */
    551                afterDigit2 = false;
    552                break;
    553            }
    554            break; /* deliver c2 */
    555        }
    556 
    557        /* If we reach the ends of both strings then they match */
    558        if ((c1|c2)==0) {
    559            return 0;
    560        }
    561 
    562        /* Case-insensitive comparison */
    563        rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
    564        if (rc != 0) {
    565            return rc;
    566        }
    567    }
    568 }
    569 
    570 /*
    571 * search for an alias
    572 * return the converter number index for gConverterList
    573 */
    574 static inline uint32_t
    575 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    576    uint32_t mid, start, limit;
    577    uint32_t lastMid;
    578    int result;
    579    int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
    580    char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    581 
    582    if (!isUnnormalized) {
    583        if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
    584            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    585            return UINT32_MAX;
    586        }
    587 
    588        /* Lower case and remove ignoreable characters. */
    589        ucnv_io_stripForCompare(strippedName, alias);
    590        alias = strippedName;
    591    }
    592 
    593    /* do a binary search for the alias */
    594    start = 0;
    595    limit = gMainTable.untaggedConvArraySize;
    596    mid = limit;
    597    lastMid = UINT32_MAX;
    598 
    599    for (;;) {
    600        mid = (start + limit) / 2;
    601        if (lastMid == mid) {   /* Have we moved? */
    602            break;  /* We haven't moved, and it wasn't found. */
    603        }
    604        lastMid = mid;
    605        if (isUnnormalized) {
    606            result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
    607        }
    608        else {
    609            result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
    610        }
    611 
    612        if (result < 0) {
    613            limit = mid;
    614        } else if (result > 0) {
    615            start = mid;
    616        } else {
    617            /* Since the gencnval tool folds duplicates into one entry,
    618             * this alias in gAliasList is unique, but different standards
    619             * may map an alias to different converters.
    620             */
    621            if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
    622                *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
    623            }
    624            /* State whether the canonical converter name contains an option.
    625            This information is contained in this list in order to maintain backward & forward compatibility. */
    626            if (containsOption) {
    627                UBool containsCnvOptionInfo = static_cast<UBool>(gMainTable.optionTable->containsCnvOptionInfo);
    628                *containsOption = static_cast<UBool>((containsCnvOptionInfo
    629                    && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
    630                    || !containsCnvOptionInfo);
    631            }
    632            return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
    633        }
    634    }
    635 
    636    return UINT32_MAX;
    637 }
    638 
    639 /*
    640 * Is this alias in this list?
    641 * alias and listOffset should be non-nullptr.
    642 */
    643 static inline UBool
    644 isAliasInList(const char *alias, uint32_t listOffset) {
    645    if (listOffset) {
    646        uint32_t currAlias;
    647        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    648        /* +1 to skip listCount */
    649        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    650        for (currAlias = 0; currAlias < listCount; currAlias++) {
    651            if (currList[currAlias]
    652                && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
    653            {
    654                return true;
    655            }
    656        }
    657    }
    658    return false;
    659 }
    660 
    661 /*
    662 * Search for an standard name of an alias (what is the default name
    663 * that this standard uses?)
    664 * return the listOffset for gTaggedAliasLists. If it's 0,
    665 * the it couldn't be found, but the parameters are valid.
    666 */
    667 static uint32_t
    668 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    669    uint32_t idx;
    670    uint32_t listOffset;
    671    uint32_t convNum;
    672    UErrorCode myErr = U_ZERO_ERROR;
    673    uint32_t tagNum = getTagNumber(standard);
    674 
    675    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    676    convNum = findConverter(alias, nullptr, &myErr);
    677    if (myErr != U_ZERO_ERROR) {
    678        *pErrorCode = myErr;
    679    }
    680 
    681    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    682        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    683        if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
    684            return listOffset;
    685        }
    686        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    687            /* Uh Oh! They used an ambiguous alias.
    688               We have to search the whole swiss cheese starting
    689               at the highest standard affinity.
    690               This may take a while.
    691            */
    692            for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
    693                listOffset = gMainTable.taggedAliasArray[idx];
    694                if (listOffset && isAliasInList(alias, listOffset)) {
    695                    uint32_t currTagNum = idx/gMainTable.converterListSize;
    696                    uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
    697                    uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
    698                    if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
    699                        return tempListOffset;
    700                    }
    701                    /* else keep on looking */
    702                    /* We could speed this up by starting on the next row
    703                       because an alias is unique per row, right now.
    704                       This would change if alias versioning appears. */
    705                }
    706            }
    707            /* The standard doesn't know about the alias */
    708        }
    709        /* else no default name */
    710        return 0;
    711    }
    712    /* else converter or tag not found */
    713 
    714    return UINT32_MAX;
    715 }
    716 
    717 /* Return the canonical name */
    718 static uint32_t
    719 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    720    uint32_t idx;
    721    uint32_t listOffset;
    722    uint32_t convNum;
    723    UErrorCode myErr = U_ZERO_ERROR;
    724    uint32_t tagNum = getTagNumber(standard);
    725 
    726    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    727    convNum = findConverter(alias, nullptr, &myErr);
    728    if (myErr != U_ZERO_ERROR) {
    729        *pErrorCode = myErr;
    730    }
    731 
    732    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    733        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    734        if (listOffset && isAliasInList(alias, listOffset)) {
    735            return convNum;
    736        }
    737        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    738            /* Uh Oh! They used an ambiguous alias.
    739               We have to search one slice of the swiss cheese.
    740               We search only in the requested tag, not the whole thing.
    741               This may take a while.
    742            */
    743            uint32_t convStart = (tagNum)*gMainTable.converterListSize;
    744            uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
    745            for (idx = convStart; idx < convLimit; idx++) {
    746                listOffset = gMainTable.taggedAliasArray[idx];
    747                if (listOffset && isAliasInList(alias, listOffset)) {
    748                    return idx-convStart;
    749                }
    750            }
    751            /* The standard doesn't know about the alias */
    752        }
    753        /* else no canonical name */
    754    }
    755    /* else converter or tag not found */
    756 
    757    return UINT32_MAX;
    758 }
    759 
    760 U_CAPI const char *
    761 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    762    const char *aliasTmp = alias;
    763    int32_t i = 0;
    764    for (i = 0; i < 2; i++) {
    765        if (i == 1) {
    766            /*
    767             * After the first unsuccess converter lookup, check to see if
    768             * the name begins with 'x-'. If it does, strip it off and try
    769             * again.  This behaviour is similar to how ICU4J does it.
    770             */
    771            if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
    772                aliasTmp = aliasTmp+2;
    773            } else {
    774                break;
    775            }
    776        }
    777        if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
    778            uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
    779            if (convNum < gMainTable.converterListSize) {
    780                return GET_STRING(gMainTable.converterList[convNum]);
    781            }
    782            /* else converter not found */
    783        } else {
    784            break;
    785        }
    786    }
    787 
    788    return nullptr;
    789 }
    790 
    791 U_CDECL_BEGIN
    792 
    793 
    794 static int32_t U_CALLCONV
    795 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    796    int32_t value = 0;
    797    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    798    uint32_t listOffset = myContext->listOffset;
    799 
    800    if (listOffset) {
    801        value = gMainTable.taggedAliasLists[listOffset];
    802    }
    803    return value;
    804 }
    805 
    806 static const char * U_CALLCONV
    807 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
    808                            int32_t* resultLength,
    809                            UErrorCode * /*pErrorCode*/)
    810 {
    811    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    812    uint32_t listOffset = myContext->listOffset;
    813 
    814    if (listOffset) {
    815        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    816        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    817 
    818        if (myContext->listIdx < listCount) {
    819            const char *myStr = GET_STRING(currList[myContext->listIdx++]);
    820            if (resultLength) {
    821                *resultLength = (int32_t)uprv_strlen(myStr);
    822            }
    823            return myStr;
    824        }
    825    }
    826    /* Either we accessed a zero length list, or we enumerated too far. */
    827    if (resultLength) {
    828        *resultLength = 0;
    829    }
    830    return nullptr;
    831 }
    832 
    833 static void U_CALLCONV
    834 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    835    ((UAliasContext *)(enumerator->context))->listIdx = 0;
    836 }
    837 
    838 static void U_CALLCONV
    839 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
    840    uprv_free(enumerator->context);
    841    uprv_free(enumerator);
    842 }
    843 
    844 U_CDECL_END
    845 
    846 /* Enumerate the aliases for the specified converter and standard tag */
    847 static const UEnumeration gEnumAliases = {
    848    nullptr,
    849    nullptr,
    850    ucnv_io_closeUEnumeration,
    851    ucnv_io_countStandardAliases,
    852    uenum_unextDefault,
    853    ucnv_io_nextStandardAliases,
    854    ucnv_io_resetStandardAliases
    855 };
    856 
    857 U_CAPI UEnumeration * U_EXPORT2
    858 ucnv_openStandardNames(const char *convName,
    859                       const char *standard,
    860                       UErrorCode *pErrorCode)
    861 {
    862    UEnumeration *myEnum = nullptr;
    863    if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
    864        uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
    865 
    866        /* When listOffset == 0, we want to acknowledge that the
    867           converter name and standard are okay, but there
    868           is nothing to enumerate. */
    869        if (listOffset < gMainTable.taggedAliasListsSize) {
    870            UAliasContext *myContext;
    871 
    872            myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
    873            if (myEnum == nullptr) {
    874                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    875                return nullptr;
    876            }
    877            uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
    878            myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
    879            if (myContext == nullptr) {
    880                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    881                uprv_free(myEnum);
    882                return nullptr;
    883            }
    884            myContext->listOffset = listOffset;
    885            myContext->listIdx = 0;
    886            myEnum->context = myContext;
    887        }
    888        /* else converter or tag not found */
    889    }
    890    return myEnum;
    891 }
    892 
    893 static uint16_t
    894 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
    895    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    896        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
    897        if (convNum < gMainTable.converterListSize) {
    898            /* tagListNum - 1 is the ALL tag */
    899            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    900 
    901            if (listOffset) {
    902                return gMainTable.taggedAliasLists[listOffset];
    903            }
    904            /* else this shouldn't happen. internal program error */
    905        }
    906        /* else converter not found */
    907    }
    908    return 0;
    909 }
    910 
    911 static uint16_t
    912 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
    913    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    914        uint32_t currAlias;
    915        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
    916        if (convNum < gMainTable.converterListSize) {
    917            /* tagListNum - 1 is the ALL tag */
    918            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    919 
    920            if (listOffset) {
    921                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    922                /* +1 to skip listCount */
    923                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    924 
    925                for (currAlias = start; currAlias < listCount; currAlias++) {
    926                    aliases[currAlias] = GET_STRING(currList[currAlias]);
    927                }
    928            }
    929            /* else this shouldn't happen. internal program error */
    930        }
    931        /* else converter not found */
    932    }
    933    return 0;
    934 }
    935 
    936 static const char *
    937 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
    938    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    939        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
    940        if (convNum < gMainTable.converterListSize) {
    941            /* tagListNum - 1 is the ALL tag */
    942            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    943 
    944            if (listOffset) {
    945                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    946                /* +1 to skip listCount */
    947                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    948 
    949                if (n < listCount)  {
    950                    return GET_STRING(currList[n]);
    951                }
    952                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    953            }
    954            /* else this shouldn't happen. internal program error */
    955        }
    956        /* else converter not found */
    957    }
    958    return nullptr;
    959 }
    960 
    961 static uint16_t
    962 ucnv_io_countStandards(UErrorCode *pErrorCode) {
    963    if (haveAliasData(pErrorCode)) {
    964        /* Don't include the empty list */
    965        return static_cast<uint16_t>(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
    966    }
    967 
    968    return 0;
    969 }
    970 
    971 U_CAPI const char * U_EXPORT2
    972 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
    973    if (haveAliasData(pErrorCode)) {
    974        if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
    975            return GET_STRING(gMainTable.tagList[n]);
    976        }
    977        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    978    }
    979 
    980    return nullptr;
    981 }
    982 
    983 U_CAPI const char * U_EXPORT2
    984 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    985    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    986        uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
    987 
    988        if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
    989            const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    990 
    991            /* Get the preferred name from this list */
    992            if (currList[0]) {
    993                return GET_STRING(currList[0]);
    994            }
    995            /* else someone screwed up the alias table. */
    996            /* *pErrorCode = U_INVALID_FORMAT_ERROR */
    997        }
    998    }
    999 
   1000    return nullptr;
   1001 }
   1002 
   1003 U_CAPI uint16_t U_EXPORT2
   1004 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
   1005 {
   1006    return ucnv_io_countAliases(alias, pErrorCode);
   1007 }
   1008 
   1009 
   1010 U_CAPI const char* U_EXPORT2
   1011 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
   1012 {
   1013    return ucnv_io_getAlias(alias, n, pErrorCode);
   1014 }
   1015 
   1016 U_CAPI void U_EXPORT2
   1017 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
   1018 {
   1019    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
   1020 }
   1021 
   1022 U_CAPI uint16_t U_EXPORT2
   1023 ucnv_countStandards()
   1024 {
   1025    UErrorCode err = U_ZERO_ERROR;
   1026    return ucnv_io_countStandards(&err);
   1027 }
   1028 
   1029 U_CAPI const char * U_EXPORT2
   1030 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   1031    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   1032        uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
   1033 
   1034        if (convNum < gMainTable.converterListSize) {
   1035            return GET_STRING(gMainTable.converterList[convNum]);
   1036        }
   1037    }
   1038 
   1039    return nullptr;
   1040 }
   1041 
   1042 U_CDECL_BEGIN
   1043 
   1044 
   1045 static int32_t U_CALLCONV
   1046 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
   1047    return gMainTable.converterListSize;
   1048 }
   1049 
   1050 static const char * U_CALLCONV
   1051 ucnv_io_nextAllConverters(UEnumeration *enumerator,
   1052                            int32_t* resultLength,
   1053                            UErrorCode * /*pErrorCode*/)
   1054 {
   1055    uint16_t *myContext = (uint16_t *)(enumerator->context);
   1056 
   1057    if (*myContext < gMainTable.converterListSize) {
   1058        const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
   1059        if (resultLength) {
   1060            *resultLength = (int32_t)uprv_strlen(myStr);
   1061        }
   1062        return myStr;
   1063    }
   1064    /* Either we accessed a zero length list, or we enumerated too far. */
   1065    if (resultLength) {
   1066        *resultLength = 0;
   1067    }
   1068    return nullptr;
   1069 }
   1070 
   1071 static void U_CALLCONV
   1072 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   1073    *((uint16_t *)(enumerator->context)) = 0;
   1074 }
   1075 U_CDECL_END
   1076 static const UEnumeration gEnumAllConverters = {
   1077    nullptr,
   1078    nullptr,
   1079    ucnv_io_closeUEnumeration,
   1080    ucnv_io_countAllConverters,
   1081    uenum_unextDefault,
   1082    ucnv_io_nextAllConverters,
   1083    ucnv_io_resetAllConverters
   1084 };
   1085 
   1086 U_CAPI UEnumeration * U_EXPORT2
   1087 ucnv_openAllNames(UErrorCode *pErrorCode) {
   1088    UEnumeration *myEnum = nullptr;
   1089    if (haveAliasData(pErrorCode)) {
   1090        uint16_t *myContext;
   1091 
   1092        myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
   1093        if (myEnum == nullptr) {
   1094            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1095            return nullptr;
   1096        }
   1097        uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
   1098        myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
   1099        if (myContext == nullptr) {
   1100            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1101            uprv_free(myEnum);
   1102            return nullptr;
   1103        }
   1104        *myContext = 0;
   1105        myEnum->context = myContext;
   1106    }
   1107    return myEnum;
   1108 }
   1109 
   1110 U_CAPI uint16_t
   1111 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
   1112    if (haveAliasData(pErrorCode)) {
   1113        return (uint16_t)gMainTable.converterListSize;
   1114    }
   1115    return 0;
   1116 }
   1117 
   1118 /* alias table swapping ----------------------------------------------------- */
   1119 
   1120 U_CDECL_BEGIN
   1121 
   1122 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
   1123 U_CDECL_END
   1124 
   1125 
   1126 /*
   1127 * row of a temporary array
   1128 *
   1129 * gets platform-endian charset string indexes and sorting indexes;
   1130 * after sorting this array by strings, the actual arrays are permutated
   1131 * according to the sorting indexes
   1132 */
   1133 typedef struct TempRow {
   1134    uint16_t strIndex, sortIndex;
   1135 } TempRow;
   1136 
   1137 typedef struct TempAliasTable {
   1138    const char *chars;
   1139    TempRow *rows;
   1140    uint16_t *resort;
   1141    StripForCompareFn *stripForCompare;
   1142 } TempAliasTable;
   1143 
   1144 enum {
   1145    STACK_ROW_CAPACITY=500
   1146 };
   1147 
   1148 static int32_t U_CALLCONV
   1149 io_compareRows(const void *context, const void *left, const void *right) {
   1150    char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
   1151         strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1152 
   1153    TempAliasTable *tempTable=(TempAliasTable *)context;
   1154    const char *chars=tempTable->chars;
   1155 
   1156    return static_cast<int32_t>(uprv_strcmp(
   1157        tempTable->stripForCompare(strippedLeft, chars + 2 * static_cast<const TempRow*>(left)->strIndex),
   1158        tempTable->stripForCompare(strippedRight, chars + 2 * static_cast<const TempRow*>(right)->strIndex)));
   1159 }
   1160 
   1161 U_CAPI int32_t U_EXPORT2
   1162 ucnv_swapAliases(const UDataSwapper *ds,
   1163                 const void *inData, int32_t length, void *outData,
   1164                 UErrorCode *pErrorCode) {
   1165    const UDataInfo *pInfo;
   1166    int32_t headerSize;
   1167 
   1168    const uint16_t *inTable;
   1169    const uint32_t *inSectionSizes;
   1170    uint32_t toc[offsetsCount];
   1171    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
   1172    uint32_t i, count, tocLength, topOffset;
   1173 
   1174    TempRow rows[STACK_ROW_CAPACITY];
   1175    uint16_t resort[STACK_ROW_CAPACITY];
   1176    TempAliasTable tempTable;
   1177 
   1178    /* udata_swapDataHeader checks the arguments */
   1179    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1180    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
   1181        return 0;
   1182    }
   1183 
   1184    /* check data format and format version */
   1185    pInfo=(const UDataInfo *)((const char *)inData+4);
   1186    if(!(
   1187        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   1188        pInfo->dataFormat[1]==0x76 &&
   1189        pInfo->dataFormat[2]==0x41 &&
   1190        pInfo->dataFormat[3]==0x6c &&
   1191        pInfo->formatVersion[0]==3
   1192    )) {
   1193        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
   1194                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1195                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1196                         pInfo->formatVersion[0]);
   1197        *pErrorCode=U_UNSUPPORTED_ERROR;
   1198        return 0;
   1199    }
   1200 
   1201    /* an alias table must contain at least the table of contents array */
   1202    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
   1203        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1204                         length-headerSize);
   1205        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1206        return 0;
   1207    }
   1208 
   1209    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
   1210    inTable=(const uint16_t *)inSectionSizes;
   1211    uprv_memset(toc, 0, sizeof(toc));
   1212    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
   1213    if(tocLength<minTocLength || offsetsCount<=tocLength) {
   1214        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
   1215        *pErrorCode=U_INVALID_FORMAT_ERROR;
   1216        return 0;
   1217    }
   1218 
   1219    /* read the known part of the table of contents */
   1220    for(i=converterListIndex; i<=tocLength; ++i) {
   1221        toc[i]=ds->readUInt32(inSectionSizes[i]);
   1222    }
   1223 
   1224    /* compute offsets */
   1225    uprv_memset(offsets, 0, sizeof(offsets));
   1226    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
   1227    for(i=tagListIndex; i<=tocLength; ++i) {
   1228        offsets[i]=offsets[i-1]+toc[i-1];
   1229    }
   1230 
   1231    /* compute the overall size of the after-header data, in numbers of 16-bit units */
   1232    topOffset=offsets[i-1]+toc[i-1];
   1233 
   1234    if(length>=0) {
   1235        uint16_t *outTable;
   1236        const uint16_t *p, *p2;
   1237        uint16_t *q, *q2;
   1238        uint16_t oldIndex;
   1239 
   1240        if((length-headerSize)<(2*(int32_t)topOffset)) {
   1241            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1242                             length-headerSize);
   1243            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1244            return 0;
   1245        }
   1246 
   1247        outTable=(uint16_t *)((char *)outData+headerSize);
   1248 
   1249        /* swap the entire table of contents */
   1250        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
   1251 
   1252        /* swap unormalized strings & normalized strings */
   1253        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
   1254                             outTable+offsets[stringTableIndex], pErrorCode);
   1255        if(U_FAILURE(*pErrorCode)) {
   1256            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
   1257            return 0;
   1258        }
   1259 
   1260        if(ds->inCharset==ds->outCharset) {
   1261            /* no need to sort, just swap all 16-bit values together */
   1262            ds->swapArray16(ds,
   1263                            inTable+offsets[converterListIndex],
   1264                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
   1265                            outTable+offsets[converterListIndex],
   1266                            pErrorCode);
   1267        } else {
   1268            /* allocate the temporary table for sorting */
   1269            count=toc[aliasListIndex];
   1270 
   1271            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
   1272 
   1273            if(count<=STACK_ROW_CAPACITY) {
   1274                tempTable.rows=rows;
   1275                tempTable.resort=resort;
   1276            } else {
   1277                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
   1278                if(tempTable.rows==nullptr) {
   1279                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
   1280                                     count);
   1281                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1282                    return 0;
   1283                }
   1284                tempTable.resort=(uint16_t *)(tempTable.rows+count);
   1285            }
   1286 
   1287            if(ds->outCharset==U_ASCII_FAMILY) {
   1288                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
   1289            } else /* U_EBCDIC_FAMILY */ {
   1290                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
   1291            }
   1292 
   1293            /*
   1294             * Sort unique aliases+mapped names.
   1295             *
   1296             * We need to sort the list again by outCharset strings because they
   1297             * sort differently for different charset families.
   1298             * First we set up a temporary table with the string indexes and
   1299             * sorting indexes and sort that.
   1300             * Then we permutate and copy/swap the actual values.
   1301             */
   1302            p=inTable+offsets[aliasListIndex];
   1303            q=outTable+offsets[aliasListIndex];
   1304 
   1305            p2=inTable+offsets[untaggedConvArrayIndex];
   1306            q2=outTable+offsets[untaggedConvArrayIndex];
   1307 
   1308            for(i=0; i<count; ++i) {
   1309                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
   1310                tempTable.rows[i].sortIndex=(uint16_t)i;
   1311            }
   1312 
   1313            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
   1314                           io_compareRows, &tempTable,
   1315                           false, pErrorCode);
   1316 
   1317            if(U_SUCCESS(*pErrorCode)) {
   1318                /* copy/swap/permutate items */
   1319                if(p!=q) {
   1320                    for(i=0; i<count; ++i) {
   1321                        oldIndex=tempTable.rows[i].sortIndex;
   1322                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
   1323                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
   1324                    }
   1325                } else {
   1326                    /*
   1327                     * If we swap in-place, then the permutation must use another
   1328                     * temporary array (tempTable.resort)
   1329                     * before the results are copied to the outBundle.
   1330                     */
   1331                    uint16_t *r=tempTable.resort;
   1332 
   1333                    for(i=0; i<count; ++i) {
   1334                        oldIndex=tempTable.rows[i].sortIndex;
   1335                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
   1336                    }
   1337                    uprv_memcpy(q, r, 2*(size_t)count);
   1338 
   1339                    for(i=0; i<count; ++i) {
   1340                        oldIndex=tempTable.rows[i].sortIndex;
   1341                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
   1342                    }
   1343                    uprv_memcpy(q2, r, 2*(size_t)count);
   1344                }
   1345            }
   1346 
   1347            if(tempTable.rows!=rows) {
   1348                uprv_free(tempTable.rows);
   1349            }
   1350 
   1351            if(U_FAILURE(*pErrorCode)) {
   1352                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
   1353                                 count);
   1354                return 0;
   1355            }
   1356 
   1357            /* swap remaining 16-bit values */
   1358            ds->swapArray16(ds,
   1359                            inTable+offsets[converterListIndex],
   1360                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
   1361                            outTable+offsets[converterListIndex],
   1362                            pErrorCode);
   1363            ds->swapArray16(ds,
   1364                            inTable+offsets[taggedAliasArrayIndex],
   1365                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
   1366                            outTable+offsets[taggedAliasArrayIndex],
   1367                            pErrorCode);
   1368        }
   1369    }
   1370 
   1371    return headerSize+2*(int32_t)topOffset;
   1372 }
   1373 
   1374 #endif
   1375 
   1376 
   1377 /*
   1378 * Hey, Emacs, please set the following:
   1379 *
   1380 * Local Variables:
   1381 * indent-tabs-mode: nil
   1382 * End:
   1383 *
   1384 */