tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

makeconv.cpp (29821B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ********************************************************************************
      5 *
      6 *   Copyright (C) 1998-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ********************************************************************************
     10 *
     11 *
     12 *  makeconv.cpp:
     13 *  tool creating a binary (compressed) representation of the conversion mapping
     14 *  table (IBM NLTC ucmap format).
     15 *
     16 *  05/04/2000    helena     Added fallback mapping into the picture...
     17 *  06/29/2000  helena      Major rewrite of the callback APIs.
     18 */
     19 
     20 #include <stdio.h>
     21 #include "unicode/putil.h"
     22 #include "unicode/ucnv_err.h"
     23 #include "charstr.h"
     24 #include "ucnv_bld.h"
     25 #include "ucnv_imp.h"
     26 #include "ucnv_cnv.h"
     27 #include "cstring.h"
     28 #include "cmemory.h"
     29 #include "uinvchar.h"
     30 #include "filestrm.h"
     31 #include "toolutil.h"
     32 #include "uoptions.h"
     33 #include "unicode/udata.h"
     34 #include "unewdata.h"
     35 #include "uparse.h"
     36 #include "ucm.h"
     37 #include "makeconv.h"
     38 #include "genmbcs.h"
     39 
     40 #define DEBUG 0
     41 
     42 typedef struct ConvData {
     43    UCMFile *ucm;
     44    NewConverter *cnvData, *extData;
     45    UConverterSharedData sharedData;
     46    UConverterStaticData staticData;
     47 } ConvData;
     48 
     49 static void
     50 initConvData(ConvData *data) {
     51    uprv_memset(data, 0, sizeof(ConvData));
     52    data->sharedData.structSize=sizeof(UConverterSharedData);
     53    data->staticData.structSize=sizeof(UConverterStaticData);
     54    data->sharedData.staticData=&data->staticData;
     55 }
     56 
     57 static void
     58 cleanupConvData(ConvData *data) {
     59    if(data!=nullptr) {
     60        if(data->cnvData!=nullptr) {
     61            data->cnvData->close(data->cnvData);
     62            data->cnvData=nullptr;
     63        }
     64        if(data->extData!=nullptr) {
     65            data->extData->close(data->extData);
     66            data->extData=nullptr;
     67        }
     68        ucm_close(data->ucm);
     69        data->ucm=nullptr;
     70    }
     71 }
     72 
     73 /*
     74 * from ucnvstat.c - static prototypes of data-based converters
     75 */
     76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
     77 
     78 /*
     79 * Global - verbosity
     80 */
     81 UBool VERBOSE = false;
     82 UBool QUIET = false;
     83 UBool SMALL = false;
     84 UBool IGNORE_SISO_CHECK = false;
     85 
     86 static void
     87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
     88 
     89 /*
     90 * Set up the UNewData and write the converter..
     91 */
     92 static void
     93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
     94 
     95 UBool haveCopyright=true;
     96 
     97 static UDataInfo dataInfo={
     98    sizeof(UDataInfo),
     99    0,
    100 
    101    U_IS_BIG_ENDIAN,
    102    U_CHARSET_FAMILY,
    103    sizeof(char16_t),
    104    0,
    105 
    106    {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
    107    {6, 2, 0, 0},                 /* formatVersion */
    108    {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
    109 };
    110 
    111 static void
    112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
    113 {
    114    UNewDataMemory *mem = nullptr;
    115    uint32_t sz2;
    116    uint32_t size = 0;
    117    int32_t tableType;
    118 
    119    if(U_FAILURE(*status))
    120      {
    121        return;
    122      }
    123 
    124    tableType=TABLE_NONE;
    125    if(data->cnvData!=nullptr) {
    126        tableType|=TABLE_BASE;
    127    }
    128    if(data->extData!=nullptr) {
    129        tableType|=TABLE_EXT;
    130    }
    131 
    132    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : nullptr, status);
    133 
    134    if(U_FAILURE(*status))
    135      {
    136        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
    137                cnvName,
    138                "cnv",
    139                u_errorName(*status));
    140        return;
    141      }
    142 
    143    if(VERBOSE)
    144      {
    145        printf("- Opened udata %s.%s\n", cnvName, "cnv");
    146      }
    147 
    148 
    149    /* all read only, clean, platform independent data.  Mmmm. :)  */
    150    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    151    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    152    /* Now, write the table */
    153    if(tableType&TABLE_BASE) {
    154        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    155    }
    156    if(tableType&TABLE_EXT) {
    157        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    158    }
    159 
    160    sz2 = udata_finish(mem, status);
    161    if(size != sz2)
    162    {
    163        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", static_cast<int>(sz2), static_cast<int>(size));
    164        *status=U_INTERNAL_PROGRAM_ERROR;
    165    }
    166    if(VERBOSE)
    167    {
    168      printf("- Wrote %u bytes to the udata.\n", static_cast<int>(sz2));
    169    }
    170 }
    171 
    172 enum {
    173    OPT_HELP_H,
    174    OPT_HELP_QUESTION_MARK,
    175    OPT_COPYRIGHT,
    176    OPT_VERSION,
    177    OPT_DESTDIR,
    178    OPT_VERBOSE,
    179    OPT_SMALL,
    180    OPT_IGNORE_SISO_CHECK,
    181    OPT_QUIET,
    182    OPT_SOURCEDIR,
    183 
    184    OPT_COUNT
    185 };
    186 
    187 static UOption options[]={
    188    UOPTION_HELP_H,
    189    UOPTION_HELP_QUESTION_MARK,
    190    UOPTION_COPYRIGHT,
    191    UOPTION_VERSION,
    192    UOPTION_DESTDIR,
    193    UOPTION_VERBOSE,
    194    { "small", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
    195    { "ignore-siso-check", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
    196    UOPTION_QUIET,
    197    UOPTION_SOURCEDIR,
    198 };
    199 
    200 int main(int argc, char* argv[])
    201 {
    202    ConvData data;
    203    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    204 
    205    U_MAIN_INIT_ARGS(argc, argv);
    206 
    207    /* Set up the ICU version number */
    208    UVersionInfo icuVersion;
    209    u_getVersion(icuVersion);
    210    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
    211 
    212    /* preset then read command line options */
    213    options[OPT_DESTDIR].value=u_getDataDirectory();
    214    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    215 
    216    if(options[OPT_VERSION].doesOccur) {
    217        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
    218               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
    219        printf("%s\n", U_COPYRIGHT_STRING);
    220        exit(0);
    221    }
    222 
    223    /* error handling, printing usage message */
    224    if(argc<0) {
    225        fprintf(stderr,
    226            "error in command line argument \"%s\"\n",
    227            argv[-argc]);
    228    } else if(argc<2) {
    229        argc=-1;
    230    }
    231    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
    232        FILE *stdfile=argc<0 ? stderr : stdout;
    233        fprintf(stdfile,
    234            "usage: %s [-options] files...\n"
    235            "\tread .ucm codepage mapping files and write .cnv files\n"
    236            "options:\n"
    237            "\t-h or -? or --help  this usage text\n"
    238            "\t-V or --version     show a version message\n"
    239            "\t-c or --copyright   include a copyright notice\n"
    240            "\t-d or --destdir     destination directory, followed by the path\n"
    241            "\t-v or --verbose     Turn on verbose output\n"
    242            "\t-q or --quiet       do not display warnings and progress\n"
    243            "\t-s or --sourcedir   source directory, followed by the path\n",
    244            argv[0]);
    245        fprintf(stdfile,
    246            "\t      --small       Generate smaller .cnv files. They will be\n"
    247            "\t                    significantly smaller but may not be compatible with\n"
    248            "\t                    older versions of ICU and will require heap memory\n"
    249            "\t                    allocation when loaded.\n"
    250            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
    251        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    252    }
    253 
    254    /* get the options values */
    255    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    256    const char *destdir = options[OPT_DESTDIR].value;
    257    VERBOSE = options[OPT_VERBOSE].doesOccur;
    258    QUIET = options[OPT_QUIET].doesOccur;
    259    SMALL = options[OPT_SMALL].doesOccur;
    260 
    261    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
    262        IGNORE_SISO_CHECK = true;
    263    }
    264 
    265    icu::CharString outFileName;
    266    UErrorCode err = U_ZERO_ERROR;
    267    if (destdir != nullptr && *destdir != 0) {
    268        outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
    269        if (U_FAILURE(err)) {
    270            return err;
    271        }
    272    }
    273    int32_t outBasenameStart = outFileName.length();
    274 
    275 #if DEBUG
    276    {
    277      int i;
    278      printf("makeconv: processing %d files...\n", argc - 1);
    279      for(i=1; i<argc; ++i) {
    280        printf("%s ", argv[i]);
    281      }
    282      printf("\n");
    283      fflush(stdout);
    284    }
    285 #endif
    286 
    287    UBool printFilename = static_cast<UBool>(argc > 2 || VERBOSE);
    288    icu::CharString pathBuf;
    289    for (++argv; --argc; ++argv)
    290    {
    291        UErrorCode localError = U_ZERO_ERROR;
    292        const char *arg = getLongPathname(*argv);
    293 
    294        const char* sourcedir = options[OPT_SOURCEDIR].value;
    295        if (sourcedir != nullptr && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
    296            pathBuf.clear();
    297            pathBuf.appendPathPart(sourcedir, localError);
    298            pathBuf.appendPathPart(arg, localError);
    299            arg = pathBuf.data();
    300        }
    301 
    302        /*produces the right destination path for display*/
    303        outFileName.truncate(outBasenameStart);
    304        if (outBasenameStart != 0)
    305        {
    306            /* find the last file sepator */
    307            const char *basename = findBasename(arg);
    308            outFileName.append(basename, localError);
    309        }
    310        else
    311        {
    312            outFileName.append(arg, localError);
    313        }
    314        if (U_FAILURE(localError)) {
    315            return localError;
    316        }
    317 
    318        /*removes the extension if any is found*/
    319        int32_t lastDotIndex = outFileName.lastIndexOf('.');
    320        if (lastDotIndex >= outBasenameStart) {
    321            outFileName.truncate(lastDotIndex);
    322        }
    323 
    324        /* the basename without extension is the converter name */
    325        if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
    326            fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
    327            return U_BUFFER_OVERFLOW_ERROR;
    328        }
    329        uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
    330 
    331        /*Adds the target extension*/
    332        outFileName.append(CONVERTER_FILE_EXTENSION, localError);
    333        if (U_FAILURE(localError)) {
    334            return localError;
    335        }
    336 
    337 #if DEBUG
    338        printf("makeconv: processing %s  ...\n", arg);
    339        fflush(stdout);
    340 #endif
    341        initConvData(&data);
    342        createConverter(&data, arg, &localError);
    343 
    344        if (U_FAILURE(localError))
    345        {
    346            /* if an error is found, print out an error msg and keep going */
    347            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
    348                    outFileName.data(), arg, u_errorName(localError));
    349            if(U_SUCCESS(err)) {
    350                err = localError;
    351            }
    352        }
    353        else
    354        {
    355            /* Insure the static data name matches the  file name */
    356            /* Changed to ignore directory and only compare base name
    357             LDH 1/2/08*/
    358            char *p;
    359            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
    360 
    361            if(p == nullptr)            /* OK, try alternate */
    362            {
    363                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
    364                if(p == nullptr)
    365                {
    366                    p=cnvName; /* If no separators, no problem */
    367                }
    368            }
    369            else
    370            {
    371                p++;   /* If found separator, don't include it in compare */
    372            }
    373            if(uprv_stricmp(p,data.staticData.name) && !QUIET)
    374            {
    375                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
    376                    cnvName,  CONVERTER_FILE_EXTENSION,
    377                    data.staticData.name);
    378            }
    379 
    380            if (strlen(cnvName) + 1 > UPRV_LENGTHOF(data.staticData.name)) {
    381                fprintf(stderr, "converter name %s too long\n", cnvName);
    382                return U_BUFFER_OVERFLOW_ERROR;
    383            }
    384            uprv_strcpy((char*)data.staticData.name, cnvName);
    385 
    386            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
    387                fprintf(stderr,
    388                    "Error: A converter name must contain only invariant characters.\n"
    389                    "%s is not a valid converter name.\n",
    390                    data.staticData.name);
    391                if(U_SUCCESS(err)) {
    392                    err = U_INVALID_TABLE_FORMAT;
    393                }
    394            }
    395 
    396            localError = U_ZERO_ERROR;
    397            writeConverterData(&data, cnvName, destdir, &localError);
    398 
    399            if(U_FAILURE(localError))
    400            {
    401                /* if an error is found, print out an error msg and keep going*/
    402                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
    403                    u_errorName(localError));
    404                if(U_SUCCESS(err)) {
    405                    err = localError;
    406                }
    407            }
    408            else if (printFilename)
    409            {
    410                puts(outFileName.data() + outBasenameStart);
    411            }
    412        }
    413        fflush(stdout);
    414        fflush(stderr);
    415 
    416        cleanupConvData(&data);
    417    }
    418 
    419    return err;
    420 }
    421 
    422 static void
    423 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    424    if( (name[0]=='i' || name[0]=='I') &&
    425        (name[1]=='b' || name[1]=='B') &&
    426        (name[2]=='m' || name[2]=='M')
    427    ) {
    428        name+=3;
    429        if(*name=='-') {
    430            ++name;
    431        }
    432        *pPlatform=UCNV_IBM;
    433        *pCCSID = static_cast<int32_t>(uprv_strtoul(name, nullptr, 10));
    434    } else {
    435        *pPlatform=UCNV_UNKNOWN;
    436        *pCCSID=0;
    437    }
    438 }
    439 
    440 static void
    441 readHeader(ConvData *data,
    442           FileStream* convFile,
    443           UErrorCode *pErrorCode) {
    444    char line[1024];
    445    char *s, *key, *value;
    446    const UConverterStaticData *prototype;
    447    UConverterStaticData *staticData;
    448 
    449    if(U_FAILURE(*pErrorCode)) {
    450        return;
    451    }
    452 
    453    staticData=&data->staticData;
    454    staticData->platform=UCNV_IBM;
    455    staticData->subCharLen=0;
    456 
    457    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    458        /* basic parsing and handling of state-related items */
    459        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
    460            continue;
    461        }
    462 
    463        /* stop at the beginning of the mapping section */
    464        if(uprv_strcmp(line, "CHARMAP")==0) {
    465            break;
    466        }
    467 
    468        /* collect the information from the header field, ignore unknown keys */
    469        if(uprv_strcmp(key, "code_set_name")==0) {
    470            if(*value!=0) {
    471                uprv_strcpy((char *)staticData->name, value);
    472                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
    473            }
    474        } else if(uprv_strcmp(key, "subchar")==0) {
    475            uint8_t bytes[UCNV_EXT_MAX_BYTES];
    476            int8_t length;
    477 
    478            s=value;
    479            length=ucm_parseBytes(bytes, line, (const char **)&s);
    480            if(1<=length && length<=4 && *s==0) {
    481                staticData->subCharLen=length;
    482                uprv_memcpy(staticData->subChar, bytes, length);
    483            } else {
    484                fprintf(stderr, "error: illegal <subchar> %s\n", value);
    485                *pErrorCode=U_INVALID_TABLE_FORMAT;
    486                return;
    487            }
    488        } else if(uprv_strcmp(key, "subchar1")==0) {
    489            uint8_t bytes[UCNV_EXT_MAX_BYTES];
    490 
    491            s=value;
    492            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
    493                staticData->subChar1=bytes[0];
    494            } else {
    495                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
    496                *pErrorCode=U_INVALID_TABLE_FORMAT;
    497                return;
    498            }
    499        }
    500    }
    501 
    502    /* copy values from the UCMFile to the static data */
    503    staticData->maxBytesPerChar = static_cast<int8_t>(data->ucm->states.maxCharLength);
    504    staticData->minBytesPerChar = static_cast<int8_t>(data->ucm->states.minCharLength);
    505    staticData->conversionType=data->ucm->states.conversionType;
    506 
    507    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
    508        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
    509        *pErrorCode=U_INVALID_TABLE_FORMAT;
    510        return;
    511    }
    512 
    513    /*
    514     * Now that we know the type, copy any 'default' values from the table.
    515     * We need not check the type any further because the parser only
    516     * recognizes what we have prototypes for.
    517     *
    518     * For delta (extension-only) tables, copy values from the base file
    519     * instead, see createConverter().
    520     */
    521    if(data->ucm->baseName[0]==0) {
    522        prototype=ucnv_converterStaticData[staticData->conversionType];
    523        if(prototype!=nullptr) {
    524            if(staticData->name[0]==0) {
    525                uprv_strcpy((char *)staticData->name, prototype->name);
    526            }
    527 
    528            if(staticData->codepage==0) {
    529                staticData->codepage=prototype->codepage;
    530            }
    531 
    532            if(staticData->platform==0) {
    533                staticData->platform=prototype->platform;
    534            }
    535 
    536            if(staticData->minBytesPerChar==0) {
    537                staticData->minBytesPerChar=prototype->minBytesPerChar;
    538            }
    539 
    540            if(staticData->maxBytesPerChar==0) {
    541                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
    542            }
    543 
    544            if(staticData->subCharLen==0) {
    545                staticData->subCharLen=prototype->subCharLen;
    546                if(prototype->subCharLen>0) {
    547                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
    548                }
    549            }
    550        }
    551    }
    552 
    553    if(data->ucm->states.outputType<0) {
    554        data->ucm->states.outputType = static_cast<int8_t>(data->ucm->states.maxCharLength) - 1;
    555    }
    556 
    557    if( staticData->subChar1!=0 &&
    558            (staticData->minBytesPerChar>1 ||
    559                (staticData->conversionType!=UCNV_MBCS &&
    560                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    561    ) {
    562        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
    563        *pErrorCode=U_INVALID_TABLE_FORMAT;
    564    }
    565 }
    566 
    567 /* return true if a base table was read, false for an extension table */
    568 static UBool
    569 readFile(ConvData *data, const char* converterName,
    570         UErrorCode *pErrorCode) {
    571    char line[1024];
    572    char *end;
    573    FileStream *convFile;
    574 
    575    UCMStates *baseStates;
    576    UBool dataIsBase;
    577 
    578    if(U_FAILURE(*pErrorCode)) {
    579        return false;
    580    }
    581 
    582    data->ucm=ucm_open();
    583 
    584    convFile=T_FileStream_open(converterName, "r");
    585    if(convFile==nullptr) {
    586        *pErrorCode=U_FILE_ACCESS_ERROR;
    587        return false;
    588    }
    589 
    590    readHeader(data, convFile, pErrorCode);
    591    if(U_FAILURE(*pErrorCode)) {
    592        return false;
    593    }
    594 
    595    if(data->ucm->baseName[0]==0) {
    596        dataIsBase=true;
    597        baseStates=&data->ucm->states;
    598        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
    599    } else {
    600        dataIsBase=false;
    601        baseStates=nullptr;
    602    }
    603 
    604    /* read the base table */
    605    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    606    if(U_FAILURE(*pErrorCode)) {
    607        return false;
    608    }
    609 
    610    /* read an extension table if there is one */
    611    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    612        end=uprv_strchr(line, 0);
    613        while(line<end &&
    614              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
    615            --end;
    616        }
    617        *end=0;
    618 
    619        if(line[0]=='#' || u_skipWhitespace(line)==end) {
    620            continue; /* ignore empty and comment lines */
    621        }
    622 
    623        if(0==uprv_strcmp(line, "CHARMAP")) {
    624            /* read the extension table */
    625            ucm_readTable(data->ucm, convFile, false, baseStates, pErrorCode);
    626        } else {
    627            fprintf(stderr, "unexpected text after the base mapping table\n");
    628        }
    629        break;
    630    }
    631 
    632    T_FileStream_close(convFile);
    633 
    634    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
    635        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
    636        *pErrorCode=U_INVALID_TABLE_FORMAT;
    637    }
    638 
    639    return dataIsBase;
    640 }
    641 
    642 static void
    643 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    644    ConvData baseData;
    645    UBool dataIsBase;
    646 
    647    UConverterStaticData *staticData;
    648    UCMStates *states, *baseStates;
    649 
    650    if(U_FAILURE(*pErrorCode)) {
    651        return;
    652    }
    653 
    654    initConvData(data);
    655 
    656    dataIsBase=readFile(data, converterName, pErrorCode);
    657    if(U_FAILURE(*pErrorCode)) {
    658        return;
    659    }
    660 
    661    staticData=&data->staticData;
    662    states=&data->ucm->states;
    663 
    664    if(dataIsBase) {
    665        /*
    666         * Build a normal .cnv file with a base table
    667         * and an optional extension table.
    668         */
    669        data->cnvData=MBCSOpen(data->ucm);
    670        if(data->cnvData==nullptr) {
    671            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    672 
    673        } else if(!data->cnvData->isValid(data->cnvData,
    674                            staticData->subChar, staticData->subCharLen)
    675        ) {
    676            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    677            *pErrorCode=U_INVALID_TABLE_FORMAT;
    678 
    679        } else if(staticData->subChar1!=0 &&
    680                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
    681        ) {
    682            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    683            *pErrorCode=U_INVALID_TABLE_FORMAT;
    684 
    685        } else if(
    686            data->ucm->ext->mappingsLength>0 &&
    687            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, 0)
    688        ) {
    689            *pErrorCode=U_INVALID_TABLE_FORMAT;
    690        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
    691            /* sort the table so that it can be turned into UTF-8-friendly data */
    692            ucm_sortTable(data->ucm->base);
    693        }
    694 
    695        if(U_SUCCESS(*pErrorCode)) {
    696            if(
    697                /* add the base table after ucm_checkBaseExt()! */
    698                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
    699            ) {
    700                *pErrorCode=U_INVALID_TABLE_FORMAT;
    701            } else {
    702                /*
    703                 * addTable() may have requested moving more mappings to the extension table
    704                 * if they fit into the base toUnicode table but not into the
    705                 * base fromUnicode table.
    706                 * (Especially for UTF-8-friendly fromUnicode tables.)
    707                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
    708                 * to be excluded from the extension toUnicode data.
    709                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
    710                 * the base fromUnicode table.
    711                 */
    712                ucm_moveMappings(data->ucm->base, data->ucm->ext);
    713                ucm_sortTable(data->ucm->ext);
    714                if(data->ucm->ext->mappingsLength>0) {
    715                    /* prepare the extension table, if there is one */
    716                    data->extData=CnvExtOpen(data->ucm);
    717                    if(data->extData==nullptr) {
    718                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    719                    } else if(
    720                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
    721                    ) {
    722                        *pErrorCode=U_INVALID_TABLE_FORMAT;
    723                    }
    724                }
    725            }
    726        }
    727    } else {
    728        /* Build an extension-only .cnv file. */
    729        char baseFilename[500];
    730        char *basename;
    731 
    732        initConvData(&baseData);
    733 
    734        /* assemble a path/filename for data->ucm->baseName */
    735        uprv_strcpy(baseFilename, converterName);
    736        basename = const_cast<char*>(findBasename(baseFilename));
    737        uprv_strcpy(basename, data->ucm->baseName);
    738        uprv_strcat(basename, ".ucm");
    739 
    740        /* read the base table */
    741        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
    742        if(U_FAILURE(*pErrorCode)) {
    743            return;
    744        } else if(!dataIsBase) {
    745            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
    746            *pErrorCode=U_INVALID_TABLE_FORMAT;
    747        } else {
    748            /* prepare the extension table */
    749            data->extData=CnvExtOpen(data->ucm);
    750            if(data->extData==nullptr) {
    751                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    752            } else {
    753                /* fill in gaps in extension file header fields */
    754                UCMapping *m, *mLimit;
    755                uint8_t fallbackFlags;
    756 
    757                baseStates=&baseData.ucm->states;
    758                if(states->conversionType==UCNV_DBCS) {
    759                    staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = 2);
    760                } else if(states->minCharLength==0) {
    761                    staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = baseStates->minCharLength);
    762                }
    763                if(states->maxCharLength<states->minCharLength) {
    764                    staticData->maxBytesPerChar = static_cast<int8_t>(states->maxCharLength = baseStates->maxCharLength);
    765                }
    766 
    767                if(staticData->subCharLen==0) {
    768                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
    769                    staticData->subCharLen=baseData.staticData.subCharLen;
    770                }
    771                /*
    772                 * do not copy subChar1 -
    773                 * only use what is explicitly specified
    774                 * because it cannot be unset in the extension file header
    775                 */
    776 
    777                /* get the fallback flags */
    778                fallbackFlags=0;
    779                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    780                    m<mLimit && fallbackFlags!=3;
    781                    ++m
    782                ) {
    783                    if(m->f==1) {
    784                        fallbackFlags|=1;
    785                    } else if(m->f==3) {
    786                        fallbackFlags|=2;
    787                    }
    788                }
    789 
    790                if(fallbackFlags&1) {
    791                    staticData->hasFromUnicodeFallback=true;
    792                }
    793                if(fallbackFlags&2) {
    794                    staticData->hasToUnicodeFallback=true;
    795                }
    796 
    797                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
    798                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    799                    *pErrorCode=U_INVALID_TABLE_FORMAT;
    800 
    801                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
    802                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    803                    *pErrorCode=U_INVALID_TABLE_FORMAT;
    804 
    805                } else if(
    806                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
    807                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, 0)
    808                ) {
    809                    *pErrorCode=U_INVALID_TABLE_FORMAT;
    810                } else {
    811                    if(states->maxCharLength>1) {
    812                        /*
    813                         * When building a normal .cnv file with a base table
    814                         * for an MBCS (not SBCS) table with explicit precision flags,
    815                         * the MBCSAddTable() function marks some mappings for moving
    816                         * to the extension table.
    817                         * They fit into the base toUnicode table but not into the
    818                         * base fromUnicode table.
    819                         * (Note: We do have explicit precision flags because they are
    820                         * required for extension table generation, and
    821                         * ucm_checkBaseExt() verified it.)
    822                         *
    823                         * We do not call MBCSAddTable() here (we probably could)
    824                         * so we need to do the analysis before building the extension table.
    825                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
    826                         * Redundant mappings in the extension table are ok except they cost some size.
    827                         *
    828                         * Do this after ucm_checkBaseExt().
    829                         */
    830                        const MBCSData *mbcsData=MBCSGetDummy();
    831                        int32_t needsMove=0;
    832                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    833                            m<mLimit;
    834                            ++m
    835                        ) {
    836                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
    837                                m->f|=MBCS_FROM_U_EXT_FLAG;
    838                                m->moveFlag=UCM_MOVE_TO_EXT;
    839                                ++needsMove;
    840                            }
    841                        }
    842 
    843                        if(needsMove!=0) {
    844                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
    845                            ucm_sortTable(data->ucm->ext);
    846                        }
    847                    }
    848                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
    849                        *pErrorCode=U_INVALID_TABLE_FORMAT;
    850                    }
    851                }
    852            }
    853        }
    854 
    855        cleanupConvData(&baseData);
    856    }
    857 }
    858 
    859 /*
    860 * Hey, Emacs, please set the following:
    861 *
    862 * Local Variables:
    863 * indent-tabs-mode: nil
    864 * End:
    865 *
    866 */