[ tor-browser ].git.dasho

uconv.cpp (47330B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*****************************************************************************
      4 *
      5 *   Copyright (C) 1999-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *
      8 ******************************************************************************/
      9 
     10 /*
     11 * uconv(1): an iconv(1)-like converter using ICU.
     12 *
     13 * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
     14 * contributed in 1999.
     15 *
     16 * Conversion to the C conversion API and many improvements by
     17 * Yves Arrouye <yves@realnames.com>, current maintainer.
     18 *
     19 * Markus Scherer maintainer from 2003.
     20 * See source code repository history for changes.
     21 */
     22 
     23 #include <unicode/utypes.h>
     24 #include <unicode/putil.h>
     25 #include <unicode/ucnv.h>
     26 #include <unicode/uenum.h>
     27 #include <unicode/unistr.h>
     28 #include <unicode/translit.h>
     29 #include <unicode/uset.h>
     30 #include <unicode/uclean.h>
     31 #include <unicode/utf16.h>
     32 
     33 #include <stdio.h>
     34 #include <errno.h>
     35 #include <string.h>
     36 #include <stdlib.h>
     37 
     38 #include "cmemory.h"
     39 #include "cstring.h"
     40 #include "ustrfmt.h"
     41 
     42 #include "unicode/uwmsg.h"
     43 
     44 U_NAMESPACE_USE
     45 
     46 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
     47 #include <io.h>
     48 #include <fcntl.h>
     49 #if U_PLATFORM_USES_ONLY_WIN32_API
     50 #define USE_FILENO_BINARY_MODE 1
     51 /* Windows likes to rename Unix-like functions */
     52 #ifndef fileno
     53 #define fileno _fileno
     54 #endif
     55 #ifndef setmode
     56 #define setmode _setmode
     57 #endif
     58 #ifndef O_BINARY
     59 #define O_BINARY _O_BINARY
     60 #endif
     61 #endif
     62 #endif
     63 
     64 #ifdef UCONVMSG_LINK
     65 /* below from the README */
     66 #include "unicode/utypes.h"
     67 #include "unicode/udata.h"
     68 U_CFUNC char uconvmsg_dat[];
     69 #endif
     70 
     71 #define DEFAULT_BUFSZ   4096
     72 #define UCONVMSG "uconvmsg"
     73 
     74 static UResourceBundle *gBundle = nullptr; /* Bundle containing messages. */
     75 
     76 /*
     77 * Initialize the message bundle so that message strings can be fetched
     78 * by u_wmsg().
     79 *
     80 */
     81 
     82 static void initMsg(const char *pname) {
     83    static int ps = 0;
     84 
     85    if (!ps) {
     86        char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
     87        UErrorCode err = U_ZERO_ERROR;
     88 
     89        ps = 1;
     90 
     91        /* Set up our static data - if any */
     92 #if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
     93        udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
     94        if (U_FAILURE(err)) {
     95          fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
     96                  pname, u_errorName(err));
     97          err = U_ZERO_ERROR; /* It may still fail */
     98        }
     99 #endif
    100 
    101        /* Get messages. */
    102        gBundle = u_wmsg_setPath(UCONVMSG, &err);
    103        if (U_FAILURE(err)) {
    104            fprintf(stderr,
    105                    "%s: warning: couldn't open bundle %s: %s\n",
    106                    pname, UCONVMSG, u_errorName(err));
    107 #ifdef UCONVMSG_LINK
    108            fprintf(stderr,
    109                    "%s: setAppData was called, internal data %s failed to load\n",
    110                        pname, UCONVMSG);
    111 #endif
    112 
    113            err = U_ZERO_ERROR;
    114            /* that was try #1, try again with a path */
    115            uprv_strcpy(dataPath, u_getDataDirectory());
    116            uprv_strcat(dataPath, U_FILE_SEP_STRING);
    117            uprv_strcat(dataPath, UCONVMSG);
    118 
    119            gBundle = u_wmsg_setPath(dataPath, &err);
    120            if (U_FAILURE(err)) {
    121                fprintf(stderr,
    122                    "%s: warning: still couldn't open bundle %s: %s\n",
    123                    pname, dataPath, u_errorName(err));
    124                fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
    125            }
    126        }
    127    }
    128 }
    129 
    130 /* Mapping of callback names to the callbacks passed to the converter
    131   API. */
    132 
    133 static struct callback_ent {
    134    const char *name;
    135    UConverterFromUCallback fromu;
    136    const void *fromuctxt;
    137    UConverterToUCallback tou;
    138    const void *touctxt;
    139 } transcode_callbacks[] = {
    140    { "substitute",
    141      UCNV_FROM_U_CALLBACK_SUBSTITUTE, nullptr,
    142      UCNV_TO_U_CALLBACK_SUBSTITUTE, nullptr },
    143    { "skip",
    144      UCNV_FROM_U_CALLBACK_SKIP, nullptr,
    145      UCNV_TO_U_CALLBACK_SKIP, nullptr },
    146    { "stop",
    147      UCNV_FROM_U_CALLBACK_STOP, nullptr,
    148      UCNV_TO_U_CALLBACK_STOP, nullptr },
    149    { "escape",
    150      UCNV_FROM_U_CALLBACK_ESCAPE, nullptr,
    151      UCNV_TO_U_CALLBACK_ESCAPE, nullptr },
    152    { "escape-icu",
    153      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
    154      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    155    { "escape-java",
    156      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
    157      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    158    { "escape-c",
    159      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
    160      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    161    { "escape-xml",
    162      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    163      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    164    { "escape-xml-hex",
    165      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    166      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    167    { "escape-xml-dec",
    168      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
    169      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    170    { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
    171      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
    172 };
    173 
    174 /* Return a pointer to a callback record given its name. */
    175 
    176 static const struct callback_ent *findCallback(const char *name) {
    177    int i, count =
    178        UPRV_LENGTHOF(transcode_callbacks);
    179 
    180    /* We'll do a linear search, there aren't many of them and bsearch()
    181       may not be that portable. */
    182 
    183    for (i = 0; i < count; ++i) {
    184        if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
    185            return &transcode_callbacks[i];
    186        }
    187    }
    188 
    189    return nullptr;
    190 }
    191 
    192 /* Print converter information. If lookfor is set, only that converter will
    193   be printed, otherwise all converters will be printed. If canon is non
    194   zero, tags and aliases for each converter are printed too, in the format
    195   expected for convrters.txt(5). */
    196 
    197 static int printConverters(const char *pname, const char *lookfor,
    198    UBool canon)
    199 {
    200    UErrorCode err = U_ZERO_ERROR;
    201    int32_t num;
    202    uint16_t num_stds;
    203    const char **stds;
    204 
    205    /* If there is a specified name, just handle that now. */
    206 
    207    if (lookfor) {
    208        if (!canon) {
    209            printf("%s\n", lookfor);
    210            return 0;
    211        } else {
    212        /*  Because we are printing a canonical name, we need the
    213            true converter name. We've done that already except for
    214            the default name (because we want to print the exact
    215            name one would get when calling ucnv_getDefaultName()
    216            in non-canon mode). But since we do not know at this
    217            point if we have the default name or something else, we
    218            need to normalize again to the canonical converter
    219            name. */
    220 
    221            const char *truename = ucnv_getAlias(lookfor, 0, &err);
    222            if (U_SUCCESS(err)) {
    223                lookfor = truename;
    224            } else {
    225                err = U_ZERO_ERROR;
    226            }
    227        }
    228    }
    229 
    230    /* Print converter names. We come here for one of two reasons: we
    231       are printing all the names (lookfor was null), or we have a
    232       single converter to print but in canon mode, hence we need to
    233       get to it in order to print everything. */
    234 
    235    num = ucnv_countAvailable();
    236    if (num <= 0) {
    237        initMsg(pname);
    238        u_wmsg(stderr, "cantGetNames");
    239        return -1;
    240    }
    241    if (lookfor) {
    242        num = 1;                /* We know where we want to be. */
    243    }
    244 
    245    num_stds = ucnv_countStandards();
    246    stds = static_cast<const char**>(uprv_malloc(num_stds * sizeof(*stds)));
    247    if (!stds) {
    248        u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
    249        return -1;
    250    } else {
    251        uint16_t s;
    252 
    253        if (canon) {
    254            printf("{ ");
    255        }
    256        for (s = 0; s < num_stds; ++s) {
    257            stds[s] = ucnv_getStandard(s, &err);
    258            if (canon) {
    259                printf("%s ", stds[s]);
    260            }
    261            if (U_FAILURE(err)) {
    262                u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
    263                goto error_cleanup;
    264            }
    265        }
    266        if (canon) {
    267            puts("}");
    268        }
    269    }
    270 
    271    for (int32_t i = 0; i < num; i++) {
    272        const char *name;
    273        uint16_t num_aliases;
    274 
    275        /* Set the name either to what we are looking for, or
    276        to the current converter name. */
    277 
    278        if (lookfor) {
    279            name = lookfor;
    280        } else {
    281            name = ucnv_getAvailableName(i);
    282        }
    283 
    284        /* Get all the aliases associated to the name. */
    285 
    286        err = U_ZERO_ERROR;
    287        num_aliases = ucnv_countAliases(name, &err);
    288        if (U_FAILURE(err)) {
    289            printf("%s", name);
    290 
    291            UnicodeString str(name, "");
    292            putchar('\t');
    293            u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    294                u_wmsg_errorName(err));
    295            goto error_cleanup;
    296        } else {
    297            uint16_t a, s, t;
    298 
    299            /* Write all the aliases and their tags. */
    300 
    301            for (a = 0; a < num_aliases; ++a) {
    302                const char *alias = ucnv_getAlias(name, a, &err);
    303 
    304                if (U_FAILURE(err)) {
    305                    UnicodeString str(name, "");
    306                    putchar('\t');
    307                    u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    308                        u_wmsg_errorName(err));
    309                    goto error_cleanup;
    310                }
    311 
    312                /* Print the current alias so that it looks right. */
    313                printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
    314                                 alias,
    315                                 (canon ? "" : " "));
    316 
    317                /* Look (slowly, linear searching) for a tag. */
    318 
    319                if (canon) {
    320                    /* -1 to skip the last standard */
    321                    for (s = t = 0; s < num_stds-1; ++s) {
    322                        UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
    323                        if (U_SUCCESS(err)) {
    324                            /* List the standard tags */
    325                            const char *standardName;
    326                            UBool isFirst = true;
    327                            UErrorCode enumError = U_ZERO_ERROR;
    328                            while ((standardName = uenum_next(nameEnum, nullptr, &enumError))) {
    329                                /* See if this alias is supported by this standard. */
    330                                if (!strcmp(standardName, alias)) {
    331                                    if (!t) {
    332                                        printf(" {");
    333                                        t = 1;
    334                                    }
    335                                    /* Print a * after the default standard name */
    336                                    printf(" %s%s", stds[s], (isFirst ? "*" : ""));
    337                                }
    338                                isFirst = false;
    339                            }
    340                        }
    341                    }
    342                    if (t) {
    343                        printf(" }");
    344                    }
    345                }
    346                /* Terminate this entry. */
    347                if (canon) {
    348                    puts("");
    349                }
    350 
    351                /* Move on. */
    352            }
    353            /* Terminate this entry. */
    354            if (!canon) {
    355                puts("");
    356            }
    357        }
    358    }
    359 
    360    /* Free temporary data. */
    361 
    362    uprv_free(stds);
    363 
    364    /* Success. */
    365 
    366    return 0;
    367 error_cleanup:
    368    uprv_free(stds);
    369    return -1;
    370 }
    371 
    372 /* Print all available transliterators. If canon is non zero, print
    373   one transliterator per line. */
    374 
    375 static int printTransliterators(UBool canon)
    376 {
    377 #if UCONFIG_NO_TRANSLITERATION
    378    printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    379    return 1;
    380 #else
    381    UErrorCode status = U_ZERO_ERROR;
    382    UEnumeration *ids = utrans_openIDs(&status);
    383    int32_t i, numtrans = uenum_count(ids, &status);
    384 
    385    char sepchar = canon ? '\n' : ' ';
    386 
    387    for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
    388    	int32_t len;
    389    	const char *nextTrans = uenum_next(ids, &len, &status);
    390 
    391        printf("%s", nextTrans);
    392        if (i < numtrans - 1) {
    393            putchar(sepchar);
    394        }
    395    }
    396 
    397    uenum_close(ids);
    398 
    399    /* Add a terminating newline if needed. */
    400 
    401    if (sepchar != '\n') {
    402        putchar('\n');
    403    }
    404 
    405    /* Success. */
    406 
    407    return 0;
    408 #endif
    409 }
    410 
    411 enum {
    412    uSP = 0x20,         // space
    413    uCR = 0xd,          // carriage return
    414    uLF = 0xa,          // line feed
    415    uNL = 0x85,         // newline
    416    uLS = 0x2028,       // line separator
    417    uPS = 0x2029,       // paragraph separator
    418    uSig = 0xfeff       // signature/BOM character
    419 };
    420 
    421 static inline int32_t
    422 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    423    // find one of
    424    // CR, LF, CRLF, NL, LS, PS
    425    // for paragraph ends (see UAX #13/Unicode 4)
    426    // and include it in the chunk
    427    // all of these characters are on the BMP
    428    // do not include FF or VT in case they are part of a paragraph
    429    // (important for bidi contexts)
    430    static const char16_t paraEnds[] = {
    431        0xd, 0xa, 0x85, 0x2028, 0x2029
    432    };
    433    enum {
    434        iCR, iLF, iNL, iLS, iPS, iCount
    435    };
    436 
    437    // first, see if there is a CRLF split between prev and s
    438    if (prev.endsWith(paraEnds + iCR, 1)) {
    439        if (s.startsWith(paraEnds + iLF, 1)) {
    440            return 1; // split CRLF, include the LF
    441        } else if (!s.isEmpty()) {
    442            return 0; // complete the last chunk
    443        } else {
    444            return -1; // wait for actual further contents to arrive
    445        }
    446    }
    447 
    448    const char16_t *u = s.getBuffer(), *limit = u + s.length();
    449    char16_t c;
    450 
    451    while (u < limit) {
    452        c = *u++;
    453        if (
    454            ((c < uSP) && (c == uCR || c == uLF)) ||
    455            (c == uNL) ||
    456            ((c & uLS) == uLS)
    457        ) {
    458            if (c == uCR) {
    459                // check for CRLF
    460                if (u == limit) {
    461                    return -1; // LF may be in the next chunk
    462                } else if (*u == uLF) {
    463                    ++u; // include the LF in this chunk
    464                }
    465            }
    466            return static_cast<int32_t>(u - s.getBuffer());
    467        }
    468    }
    469 
    470    return -1; // continue collecting the chunk
    471 }
    472 
    473 enum {
    474    CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    475    CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    476    CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
    477 };
    478 
    479 static inline char16_t
    480 nibbleToHex(uint8_t n) {
    481    n &= 0xf;
    482    return
    483        n <= 9 ?
    484            static_cast<char16_t>(0x30 + n) :
    485            static_cast<char16_t>((0x61 - 10) + n);
    486 }
    487 
    488 // check the converter's Unicode signature properties;
    489 // the fromUnicode side of the converter must be in its initial state
    490 // and will be reset again if it was used
    491 static int32_t
    492 cnvSigType(UConverter *cnv) {
    493    UErrorCode err;
    494    int32_t result;
    495 
    496    // test if the output charset can convert U+FEFF
    497    USet *set = uset_open(1, 0);
    498    err = U_ZERO_ERROR;
    499    ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    500    if (U_SUCCESS(err) && uset_contains(set, uSig)) {
    501        result = CNV_WITH_FEFF;
    502    } else {
    503        result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    504    }
    505    uset_close(set);
    506 
    507    if (result == CNV_WITH_FEFF) {
    508        // test if the output charset emits a signature anyway
    509        const char16_t a[1] = { 0x61 }; // "a"
    510        const char16_t *in;
    511 
    512        char buffer[20];
    513        char *out;
    514 
    515        in = a;
    516        out = buffer;
    517        err = U_ZERO_ERROR;
    518        ucnv_fromUnicode(cnv,
    519            &out, buffer + sizeof(buffer),
    520            &in, a + 1,
    521            nullptr, true, &err);
    522        ucnv_resetFromUnicode(cnv);
    523 
    524        if (nullptr != ucnv_detectUnicodeSignature(buffer, static_cast<int32_t>(out - buffer), nullptr, &err) &&
    525            U_SUCCESS(err)
    526        ) {
    527            result = CNV_ADDS_FEFF;
    528        }
    529    }
    530 
    531    return result;
    532 }
    533 
    534 class ConvertFile {
    535 public:
    536    ConvertFile() :
    537        buf(nullptr), outbuf(nullptr), fromoffsets(nullptr),
    538        bufsz(0), signature(0) {}
    539 
    540    void
    541    setBufferSize(size_t bufferSize) {
    542        bufsz = bufferSize;
    543 
    544        buf = new char[2 * bufsz];
    545        outbuf = buf + bufsz;
    546 
    547        // +1 for an added U+FEFF in the intermediate Unicode buffer
    548        fromoffsets = new int32_t[bufsz + 1];
    549    }
    550 
    551    ~ConvertFile() {
    552        delete [] buf;
    553        delete [] fromoffsets;
    554    }
    555 
    556    UBool convertFile(const char *pname,
    557                      const char *fromcpage,
    558                      UConverterToUCallback toucallback,
    559                      const void *touctxt,
    560                      const char *tocpage,
    561                      UConverterFromUCallback fromucallback,
    562                      const void *fromuctxt,
    563                      UBool fallback,
    564                      const char *translit,
    565                      const char *infilestr,
    566                      FILE * outfile, int verbose);
    567 private:
    568    friend int main(int argc, char **argv);
    569 
    570    char *buf, *outbuf;
    571    int32_t *fromoffsets;
    572 
    573    size_t bufsz;
    574    int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
    575 };
    576 
    577 // Convert a file from one encoding to another
    578 UBool
    579 ConvertFile::convertFile(const char *pname,
    580                         const char *fromcpage,
    581                         UConverterToUCallback toucallback,
    582                         const void *touctxt,
    583                         const char *tocpage,
    584                         UConverterFromUCallback fromucallback,
    585                         const void *fromuctxt,
    586                         UBool fallback,
    587                         const char *translit,
    588                         const char *infilestr,
    589                         FILE * outfile, int verbose)
    590 {
    591    FILE *infile;
    592    UBool ret = true;
    593    UConverter *convfrom = nullptr;
    594    UConverter *convto = nullptr;
    595    UErrorCode err = U_ZERO_ERROR;
    596    UBool flush;
    597    UBool closeFile = false;
    598    const char *cbufp, *prevbufp;
    599    char *bufp;
    600 
    601    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
    602 
    603    const char16_t *unibuf, *unibufbp;
    604    char16_t *unibufp;
    605 
    606    size_t rd, wr;
    607 
    608 #if !UCONFIG_NO_TRANSLITERATION
    609    Transliterator *t = nullptr;// Transliterator acting on Unicode data.
    610    UnicodeString chunk;        // One chunk of the text being collected for transformation.
    611 #endif
    612    UnicodeString u;            // String to do the transliteration.
    613    int32_t ulen;
    614 
    615    // use conversion offsets for error messages
    616    // unless a transliterator is used -
    617    // a text transformation will reorder characters in unpredictable ways
    618    UBool useOffsets = true;
    619 
    620    // Open the correct input file or connect to stdin for reading input
    621 
    622    if (infilestr != nullptr && strcmp(infilestr, "-")) {
    623        infile = fopen(infilestr, "rb");
    624        if (infile == nullptr) {
    625            UnicodeString str1(infilestr, "");
    626            str1.append(static_cast<UChar32>(0));
    627            UnicodeString str2(strerror(errno), "");
    628            str2.append(static_cast<UChar32>(0));
    629            initMsg(pname);
    630            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
    631            return false;
    632        }
    633        closeFile = true;
    634    } else {
    635        infilestr = "-";
    636        infile = stdin;
    637 #ifdef USE_FILENO_BINARY_MODE
    638        if (setmode(fileno(stdin), O_BINARY) == -1) {
    639            initMsg(pname);
    640            u_wmsg(stderr, "cantSetInBinMode");
    641            return false;
    642        }
    643 #endif
    644    }
    645 
    646    if (verbose) {
    647        fprintf(stderr, "%s:\n", infilestr);
    648    }
    649 
    650 #if !UCONFIG_NO_TRANSLITERATION
    651    // Create transliterator as needed.
    652 
    653    if (translit != nullptr && *translit) {
    654        UParseError parse;
    655        UnicodeString str(translit), pestr;
    656 
    657        /* Create from rules or by ID as needed. */
    658 
    659        parse.line = -1;
    660 
    661        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
    662            t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
    663        } else {
    664            t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
    665        }
    666 
    667        if (U_FAILURE(err)) {
    668            str.append(static_cast<UChar32>(0));
    669            initMsg(pname);
    670 
    671            if (parse.line >= 0) {
    672                char16_t linebuf[20], offsetbuf[20];
    673                uprv_itou(linebuf, 20, parse.line, 10, 0);
    674                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
    675                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
    676                    u_wmsg_errorName(err), linebuf, offsetbuf);
    677            } else {
    678                u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
    679                    u_wmsg_errorName(err));
    680            }
    681 
    682            if (t) {
    683                delete t;
    684                t = nullptr;
    685            }
    686            goto error_exit;
    687        }
    688 
    689        useOffsets = false;
    690    }
    691 #endif
    692 
    693    // Create codepage converter. If the codepage or its aliases weren't
    694    // available, it returns nullptr and a failure code. We also set the
    695    // callbacks, and return errors in the same way.
    696 
    697    convfrom = ucnv_open(fromcpage, &err);
    698    if (U_FAILURE(err)) {
    699        UnicodeString str(fromcpage, "");
    700        initMsg(pname);
    701        u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
    702            u_wmsg_errorName(err));
    703        goto error_exit;
    704    }
    705    ucnv_setToUCallBack(convfrom, toucallback, touctxt, nullptr, nullptr, &err);
    706    if (U_FAILURE(err)) {
    707        initMsg(pname);
    708        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    709        goto error_exit;
    710    }
    711 
    712    convto = ucnv_open(tocpage, &err);
    713    if (U_FAILURE(err)) {
    714        UnicodeString str(tocpage, "");
    715        initMsg(pname);
    716        u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
    717            u_wmsg_errorName(err));
    718        goto error_exit;
    719    }
    720    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, nullptr, nullptr, &err);
    721    if (U_FAILURE(err)) {
    722        initMsg(pname);
    723        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    724        goto error_exit;
    725    }
    726    ucnv_setFallback(convto, fallback);
    727 
    728    UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    729    int8_t sig;
    730 
    731    // OK, we can convert now.
    732    sig = signature;
    733    rd = 0;
    734 
    735    do {
    736        willexit = false;
    737 
    738        // input file offset at the beginning of the next buffer
    739        infoffset += static_cast<uint32_t>(rd);
    740 
    741        rd = fread(buf, 1, bufsz, infile);
    742        if (ferror(infile) != 0) {
    743            UnicodeString str(strerror(errno));
    744            initMsg(pname);
    745            u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
    746            goto error_exit;
    747        }
    748 
    749        // Convert the read buffer into the new encoding via Unicode.
    750        // After the call 'unibufp' will be placed behind the last
    751        // character that was converted in the 'unibuf'.
    752        // Also the 'cbufp' is positioned behind the last converted
    753        // character.
    754        // At the last conversion in the file, flush should be set to
    755        // true so that we get all characters converted.
    756        //
    757        // The converter must be flushed at the end of conversion so
    758        // that characters on hold also will be written.
    759 
    760        cbufp = buf;
    761        flush = static_cast<UBool>(rd != bufsz);
    762 
    763        // convert until the input is consumed
    764        do {
    765            // remember the start of the current byte-to-Unicode conversion
    766            prevbufp = cbufp;
    767 
    768            unibuf = unibufp = u.getBuffer(static_cast<int32_t>(bufsz));
    769 
    770            // Use bufsz instead of u.getCapacity() for the targetLimit
    771            // so that we don't overflow fromoffsets[].
    772            ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
    773                buf + rd, useOffsets ? fromoffsets : nullptr, flush, &err);
    774 
    775            ulen = static_cast<int32_t>(unibufp - unibuf);
    776            u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
    777 
    778            // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
    779            // converting all of the input bytes.
    780            // It works like this because ucnv_toUnicode() returns only under the
    781            // following conditions:
    782            // - an error occurred during conversion (an error code is set)
    783            // - the target buffer is filled (the error code indicates an overflow)
    784            // - the source is consumed
    785            // That is, if the error code does not indicate a failure,
    786            // not even an overflow, then the source must be consumed entirely.
    787            fromSawEndOfBytes = U_SUCCESS(err);
    788 
    789            if (err == U_BUFFER_OVERFLOW_ERROR) {
    790                err = U_ZERO_ERROR;
    791            } else if (U_FAILURE(err)) {
    792                char pos[32], errorBytes[32];
    793                int8_t i, length, errorLength;
    794 
    795                UErrorCode localError = U_ZERO_ERROR;
    796                errorLength = static_cast<int8_t>(sizeof(errorBytes));
    797                ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
    798                if (U_FAILURE(localError) || errorLength == 0) {
    799                    errorLength = 1;
    800                }
    801 
    802                // print the input file offset of the start of the error bytes:
    803                // input file offset of the current byte buffer +
    804                // length of the just consumed bytes -
    805                // length of the error bytes
    806                length =
    807                    static_cast<int8_t>(snprintf(pos, sizeof(pos), "%d",
    808                        static_cast<int>(infoffset + (cbufp - buf) - errorLength)));
    809 
    810                // output the bytes that caused the error
    811                UnicodeString str;
    812                for (i = 0; i < errorLength; ++i) {
    813                    if (i > 0) {
    814                        str.append(static_cast<char16_t>(uSP));
    815                    }
    816                    str.append(nibbleToHex(static_cast<uint8_t>(errorBytes[i]) >> 4));
    817                    str.append(nibbleToHex(static_cast<uint8_t>(errorBytes[i])));
    818                }
    819 
    820                initMsg(pname);
    821                u_wmsg(stderr, "problemCvtToU",
    822                        UnicodeString(pos, length, "").getTerminatedBuffer(),
    823                        str.getTerminatedBuffer(),
    824                        u_wmsg_errorName(err));
    825 
    826                willexit = true;
    827                err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
    828            }
    829 
    830            // Replaced a check for whether the input was consumed by
    831            // looping until it is; message key "premEndInput" now obsolete.
    832 
    833            if (ulen == 0) {
    834                continue;
    835            }
    836 
    837            // remove a U+FEFF Unicode signature character if requested
    838            if (sig < 0) {
    839                if (u.charAt(0) == uSig) {
    840                    u.remove(0, 1);
    841 
    842                    // account for the removed char16_t and offset
    843                    --ulen;
    844 
    845                    if (useOffsets) {
    846                        // remove an offset from fromoffsets[] as well
    847                        // to keep the array parallel with the UChars
    848                        memmove(fromoffsets, fromoffsets + 1, ulen * 4);
    849                    }
    850 
    851                }
    852                sig = 0;
    853            }
    854 
    855 #if !UCONFIG_NO_TRANSLITERATION
    856            // Transliterate/transform if needed.
    857 
    858            // For transformation, we use chunking code -
    859            // collect Unicode input until, for example, an end-of-line,
    860            // then transform and output-convert that and continue collecting.
    861            // This makes the transformation result independent of the buffer size
    862            // while avoiding the slower keyboard mode.
    863            // The end-of-chunk characters are completely included in the
    864            // transformed string in case they are to be transformed themselves.
    865            if (t != nullptr) {
    866                UnicodeString out;
    867                int32_t chunkLimit;
    868 
    869                do {
    870                    chunkLimit = getChunkLimit(chunk, u);
    871                    if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
    872                        // use all of the rest at the end of the text
    873                        chunkLimit = u.length();
    874                    }
    875                    if (chunkLimit >= 0) {
    876                        // complete the chunk and transform it
    877                        chunk.append(u, 0, chunkLimit);
    878                        u.remove(0, chunkLimit);
    879                        t->transliterate(chunk);
    880 
    881                        // append the transformation result to the result and empty the chunk
    882                        out.append(chunk);
    883                        chunk.remove();
    884                    } else {
    885                        // continue collecting the chunk
    886                        chunk.append(u);
    887                        break;
    888                    }
    889                } while (!u.isEmpty());
    890 
    891                u = out;
    892                ulen = u.length();
    893            }
    894 #endif
    895 
    896            // add a U+FEFF Unicode signature character if requested
    897            // and possible/necessary
    898            if (sig > 0) {
    899                if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
    900                    u.insert(0, static_cast<char16_t>(uSig));
    901 
    902                    if (useOffsets) {
    903                        // insert a pseudo-offset into fromoffsets[] as well
    904                        // to keep the array parallel with the UChars
    905                        memmove(fromoffsets + 1, fromoffsets, ulen * 4);
    906                        fromoffsets[0] = -1;
    907                    }
    908 
    909                    // account for the additional char16_t and offset
    910                    ++ulen;
    911                }
    912                sig = 0;
    913            }
    914 
    915            // Convert the Unicode buffer into the destination codepage
    916            // Again 'bufp' will be placed behind the last converted character
    917            // And 'unibufp' will be placed behind the last converted unicode character
    918            // At the last conversion flush should be set to true to ensure that
    919            // all characters left get converted
    920 
    921            unibuf = unibufbp = u.getBuffer();
    922 
    923            do {
    924                bufp = outbuf;
    925 
    926                // Use fromSawEndOfBytes in addition to the flush flag -
    927                // it indicates whether the intermediate Unicode string
    928                // contains the very last UChars for the very last input bytes.
    929                ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
    930                                 &unibufbp,
    931                                 unibuf + ulen,
    932                                 nullptr, static_cast<UBool>(flush && fromSawEndOfBytes), &err);
    933 
    934                // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
    935                // converting all of the intermediate UChars.
    936                // See comment for fromSawEndOfBytes.
    937                toSawEndOfUnicode = U_SUCCESS(err);
    938 
    939                if (err == U_BUFFER_OVERFLOW_ERROR) {
    940                    err = U_ZERO_ERROR;
    941                } else if (U_FAILURE(err)) {
    942                    char16_t errorUChars[4];
    943                    const char *errtag;
    944                    char pos[32];
    945                    UChar32 c;
    946                    int8_t i, length, errorLength;
    947 
    948                    UErrorCode localError = U_ZERO_ERROR;
    949                    errorLength = UPRV_LENGTHOF(errorUChars);
    950                    ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
    951                    if (U_FAILURE(localError) || errorLength == 0) {
    952                        // need at least 1 so that we don't access beyond the length of fromoffsets[]
    953                        errorLength = 1;
    954                    }
    955 
    956                    int32_t ferroffset;
    957 
    958                    if (useOffsets) {
    959                        // Unicode buffer offset of the start of the error UChars
    960                        ferroffset = static_cast<int32_t>((unibufbp - unibuf) - errorLength);
    961                        if (ferroffset < 0) {
    962                            // approximation - the character started in the previous Unicode buffer
    963                            ferroffset = 0;
    964                        }
    965 
    966                        // get the corresponding byte offset out of fromoffsets[]
    967                        // go back if the offset is not known for some of the UChars
    968                        int32_t fromoffset;
    969                        do {
    970                            fromoffset = fromoffsets[ferroffset];
    971                        } while (fromoffset < 0 && --ferroffset >= 0);
    972 
    973                        // total input file offset =
    974                        // input file offset of the current byte buffer +
    975                        // byte buffer offset of where the current Unicode buffer is converted from +
    976                        // fromoffsets[Unicode offset]
    977                        ferroffset = static_cast<int32_t>(infoffset + (prevbufp - buf) + fromoffset);
    978                        errtag = "problemCvtFromU";
    979                    } else {
    980                        // Do not use fromoffsets if (t != nullptr) because the Unicode text may
    981                        // be different from what the offsets refer to.
    982 
    983                        // output file offset
    984                        ferroffset = static_cast<int32_t>(outfoffset + (bufp - outbuf));
    985                        errtag = "problemCvtFromUOut";
    986                    }
    987 
    988                    length = static_cast<int8_t>(snprintf(pos, sizeof(pos), "%u", static_cast<int>(ferroffset)));
    989 
    990                    // output the code points that caused the error
    991                    UnicodeString str;
    992                    for (i = 0; i < errorLength;) {
    993                        if (i > 0) {
    994                            str.append(static_cast<char16_t>(uSP));
    995                        }
    996                        U16_NEXT(errorUChars, i, errorLength, c);
    997                        if (c >= 0x100000) {
    998                            str.append(nibbleToHex(static_cast<uint8_t>(c >> 20)));
    999                        }
   1000                        if (c >= 0x10000) {
   1001                            str.append(nibbleToHex(static_cast<uint8_t>(c >> 16)));
   1002                        }
   1003                        str.append(nibbleToHex(static_cast<uint8_t>(c >> 12)));
   1004                        str.append(nibbleToHex(static_cast<uint8_t>(c >> 8)));
   1005                        str.append(nibbleToHex(static_cast<uint8_t>(c >> 4)));
   1006                        str.append(nibbleToHex(static_cast<uint8_t>(c)));
   1007                    }
   1008 
   1009                    initMsg(pname);
   1010                    u_wmsg(stderr, errtag,
   1011                            UnicodeString(pos, length, "").getTerminatedBuffer(),
   1012                            str.getTerminatedBuffer(),
   1013                           u_wmsg_errorName(err));
   1014                    u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
   1015 
   1016                    willexit = true;
   1017                    err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
   1018                }
   1019 
   1020                // Replaced a check for whether the intermediate Unicode characters were all consumed by
   1021                // looping until they are; message key "premEnd" now obsolete.
   1022 
   1023                // Finally, write the converted buffer to the output file
   1024                size_t outlen = static_cast<size_t>(bufp - outbuf);
   1025                outfoffset += static_cast<int32_t>(wr = fwrite(outbuf, 1, outlen, outfile));
   1026                if (wr != outlen) {
   1027                    UnicodeString str(strerror(errno));
   1028                    initMsg(pname);
   1029                    u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
   1030                    willexit = true;
   1031                }
   1032 
   1033                if (willexit) {
   1034                    goto error_exit;
   1035                }
   1036            } while (!toSawEndOfUnicode);
   1037        } while (!fromSawEndOfBytes);
   1038    } while (!flush);           // Stop when we have flushed the
   1039                                // converters (this means that it's
   1040                                // the end of output)
   1041 
   1042    goto normal_exit;
   1043 
   1044 error_exit:
   1045    ret = false;
   1046 
   1047 normal_exit:
   1048    // Cleanup.
   1049 
   1050    ucnv_close(convfrom);
   1051    ucnv_close(convto);
   1052 
   1053 #if !UCONFIG_NO_TRANSLITERATION
   1054    delete t;
   1055 #endif
   1056 
   1057    if (closeFile) {
   1058        fclose(infile);
   1059    }
   1060 
   1061    return ret;
   1062 }
   1063 
   1064 static void usage(const char *pname, int ecode) {
   1065    const char16_t *msg;
   1066    int32_t msgLen;
   1067    UErrorCode err = U_ZERO_ERROR;
   1068    FILE *fp = ecode ? stderr : stdout;
   1069    int res;
   1070 
   1071    initMsg(pname);
   1072    msg =
   1073        ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
   1074                            &msgLen, &err);
   1075    UnicodeString upname(pname, static_cast<int32_t>(uprv_strlen(pname) + 1));
   1076    UnicodeString mname(msg, msgLen + 1);
   1077 
   1078    res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
   1079    if (!ecode) {
   1080        if (!res) {
   1081            fputc('\n', fp);
   1082        }
   1083        if (!u_wmsg(fp, "help")) {
   1084            /* Now dump callbacks and finish. */
   1085 
   1086            int i, count =
   1087                UPRV_LENGTHOF(transcode_callbacks);
   1088            for (i = 0; i < count; ++i) {
   1089                fprintf(fp, " %s", transcode_callbacks[i].name);
   1090            }
   1091            fputc('\n', fp);
   1092        }
   1093    }
   1094 
   1095    exit(ecode);
   1096 }
   1097 
   1098 extern int
   1099 main(int argc, char **argv)
   1100 {
   1101    FILE *outfile;
   1102    int ret = 0;
   1103 
   1104    size_t bufsz = DEFAULT_BUFSZ;
   1105 
   1106    const char *fromcpage = nullptr;
   1107    const char *tocpage = nullptr;
   1108    const char *translit = nullptr;
   1109    const char *outfilestr = nullptr;
   1110    UBool fallback = false;
   1111 
   1112    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
   1113    const void *fromuctxt = nullptr;
   1114    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
   1115    const void *touctxt = nullptr;
   1116 
   1117    char **iter, **remainArgv, **remainArgvLimit;
   1118    char **end = argv + argc;
   1119 
   1120    const char *pname;
   1121 
   1122    UBool printConvs = false, printCanon = false, printTranslits = false;
   1123    const char *printName = nullptr;
   1124 
   1125    UBool verbose = false;
   1126    UErrorCode status = U_ZERO_ERROR;
   1127 
   1128    ConvertFile cf;
   1129 
   1130    /* Initialize ICU */
   1131    u_init(&status);
   1132    if (U_FAILURE(status)) {
   1133        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1134            argv[0], u_errorName(status));
   1135        exit(1);
   1136    }
   1137 
   1138    // Get and prettify pname.
   1139    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
   1140 #if U_PLATFORM_USES_ONLY_WIN32_API
   1141    if (!pname) {
   1142        pname = uprv_strrchr(*argv, '/');
   1143    }
   1144 #endif
   1145    if (!pname) {
   1146        pname = *argv;
   1147    } else {
   1148        ++pname;
   1149    }
   1150 
   1151    // First, get the arguments from command-line
   1152    // to know the codepages to convert between
   1153 
   1154    remainArgv = remainArgvLimit = argv + 1;
   1155    for (iter = argv + 1; iter != end; iter++) {
   1156        // Check for from charset
   1157        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
   1158            iter++;
   1159            if (iter != end)
   1160                fromcpage = *iter;
   1161            else
   1162                usage(pname, 1);
   1163        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
   1164            iter++;
   1165            if (iter != end)
   1166                tocpage = *iter;
   1167            else
   1168                usage(pname, 1);
   1169        } else if (strcmp("-x", *iter) == 0) {
   1170            iter++;
   1171            if (iter != end)
   1172                translit = *iter;
   1173            else
   1174                usage(pname, 1);
   1175        } else if (!strcmp("--fallback", *iter)) {
   1176            fallback = true;
   1177        } else if (!strcmp("--no-fallback", *iter)) {
   1178            fallback = false;
   1179        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
   1180            iter++;
   1181            if (iter != end) {
   1182                bufsz = atoi(*iter);
   1183                if (static_cast<int>(bufsz) <= 0) {
   1184                    initMsg(pname);
   1185                    UnicodeString str(*iter);
   1186                    initMsg(pname);
   1187                    u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
   1188                    return 3;
   1189                }
   1190            } else {
   1191                usage(pname, 1);
   1192            }
   1193        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
   1194            if (printTranslits) {
   1195                usage(pname, 1);
   1196            }
   1197            printConvs = true;
   1198        } else if (strcmp("--default-code", *iter) == 0) {
   1199            if (printTranslits) {
   1200                usage(pname, 1);
   1201            }
   1202            printName = ucnv_getDefaultName();
   1203        } else if (strcmp("--list-code", *iter) == 0) {
   1204            if (printTranslits) {
   1205                usage(pname, 1);
   1206            }
   1207 
   1208            iter++;
   1209            if (iter != end) {
   1210                UErrorCode e = U_ZERO_ERROR;
   1211                printName = ucnv_getAlias(*iter, 0, &e);
   1212                if (U_FAILURE(e) || !printName) {
   1213                    UnicodeString str(*iter);
   1214                    initMsg(pname);
   1215                    u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
   1216                    return 2;
   1217                }
   1218            } else
   1219                usage(pname, 1);
   1220        } else if (strcmp("--canon", *iter) == 0) {
   1221            printCanon = true;
   1222        } else if (strcmp("-L", *iter) == 0
   1223            || !strcmp("--list-transliterators", *iter)) {
   1224            if (printConvs) {
   1225                usage(pname, 1);
   1226            }
   1227            printTranslits = true;
   1228        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
   1229            || !strcmp("--help", *iter)) {
   1230            usage(pname, 0);
   1231        } else if (!strcmp("-c", *iter)) {
   1232            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
   1233        } else if (!strcmp("--to-callback", *iter)) {
   1234            iter++;
   1235            if (iter != end) {
   1236                const struct callback_ent *cbe = findCallback(*iter);
   1237                if (cbe) {
   1238                    fromucallback = cbe->fromu;
   1239                    fromuctxt = cbe->fromuctxt;
   1240                } else {
   1241                    UnicodeString str(*iter);
   1242                    initMsg(pname);
   1243                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1244                    return 4;
   1245                }
   1246            } else {
   1247                usage(pname, 1);
   1248            }
   1249        } else if (!strcmp("--from-callback", *iter)) {
   1250            iter++;
   1251            if (iter != end) {
   1252                const struct callback_ent *cbe = findCallback(*iter);
   1253                if (cbe) {
   1254                    toucallback = cbe->tou;
   1255                    touctxt = cbe->touctxt;
   1256                } else {
   1257                    UnicodeString str(*iter);
   1258                    initMsg(pname);
   1259                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1260                    return 4;
   1261                }
   1262            } else {
   1263                usage(pname, 1);
   1264            }
   1265        } else if (!strcmp("-i", *iter)) {
   1266            toucallback = UCNV_TO_U_CALLBACK_SKIP;
   1267        } else if (!strcmp("--callback", *iter)) {
   1268            iter++;
   1269            if (iter != end) {
   1270                const struct callback_ent *cbe = findCallback(*iter);
   1271                if (cbe) {
   1272                    fromucallback = cbe->fromu;
   1273                    fromuctxt = cbe->fromuctxt;
   1274                    toucallback = cbe->tou;
   1275                    touctxt = cbe->touctxt;
   1276                } else {
   1277                    UnicodeString str(*iter);
   1278                    initMsg(pname);
   1279                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1280                    return 4;
   1281                }
   1282            } else {
   1283                usage(pname, 1);
   1284            }
   1285        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
   1286            verbose = false;
   1287        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
   1288            verbose = true;
   1289        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
   1290            printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
   1291            return 0;
   1292        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
   1293            ++iter;
   1294            if (iter != end && !outfilestr) {
   1295                outfilestr = *iter;
   1296            } else {
   1297                usage(pname, 1);
   1298            }
   1299        } else if (0 == strcmp("--add-signature", *iter)) {
   1300            cf.signature = 1;
   1301        } else if (0 == strcmp("--remove-signature", *iter)) {
   1302            cf.signature = -1;
   1303        } else if (**iter == '-' && (*iter)[1]) {
   1304            usage(pname, 1);
   1305        } else {
   1306            // move a non-option up in argv[]
   1307            *remainArgvLimit++ = *iter;
   1308        }
   1309    }
   1310 
   1311    if (printConvs || printName) {
   1312        return printConverters(pname, printName, printCanon) ? 2 : 0;
   1313    } else if (printTranslits) {
   1314        return printTransliterators(printCanon) ? 3 : 0;
   1315    }
   1316 
   1317    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
   1318        fromcpage = ucnv_getDefaultName();
   1319    }
   1320    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
   1321        tocpage = ucnv_getDefaultName();
   1322    }
   1323 
   1324    // Open the correct output file or connect to stdout for reading input
   1325    if (outfilestr != nullptr && strcmp(outfilestr, "-")) {
   1326        outfile = fopen(outfilestr, "wb");
   1327        if (outfile == nullptr) {
   1328            UnicodeString str1(outfilestr, "");
   1329            UnicodeString str2(strerror(errno), "");
   1330            initMsg(pname);
   1331            u_wmsg(stderr, "cantCreateOutputF",
   1332                str1.getBuffer(), str2.getBuffer());
   1333            return 1;
   1334        }
   1335    } else {
   1336        outfilestr = "-";
   1337        outfile = stdout;
   1338 #ifdef USE_FILENO_BINARY_MODE
   1339        if (setmode(fileno(outfile), O_BINARY) == -1) {
   1340            u_wmsg(stderr, "cantSetOutBinMode");
   1341            exit(-1);
   1342        }
   1343 #endif
   1344    }
   1345 
   1346    /* Loop again on the arguments to find all the input files, and
   1347    convert them. */
   1348 
   1349    cf.setBufferSize(bufsz);
   1350 
   1351    if(remainArgv < remainArgvLimit) {
   1352        for (iter = remainArgv; iter != remainArgvLimit; iter++) {
   1353            if (!cf.convertFile(
   1354                    pname, fromcpage, toucallback, touctxt, tocpage,
   1355                    fromucallback, fromuctxt, fallback, translit, *iter,
   1356                    outfile, verbose)
   1357            ) {
   1358                goto error_exit;
   1359            }
   1360        }
   1361    } else {
   1362        if (!cf.convertFile(
   1363                pname, fromcpage, toucallback, touctxt, tocpage,
   1364                fromucallback, fromuctxt, fallback, translit, nullptr,
   1365                outfile, verbose)
   1366        ) {
   1367            goto error_exit;
   1368        }
   1369    }
   1370 
   1371    goto normal_exit;
   1372 error_exit:
   1373 #if !UCONFIG_NO_LEGACY_CONVERSION
   1374    ret = 1;
   1375 #else 
   1376    fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
   1377 #endif
   1378 normal_exit:
   1379 
   1380    if (outfile != stdout) {
   1381        fclose(outfile);
   1382    }
   1383 
   1384    u_cleanup();
   1385 
   1386    return ret;
   1387 }
   1388 
   1389 
   1390 /*
   1391 * Hey, Emacs, please set the following:
   1392 *
   1393 * Local Variables:
   1394 * indent-tabs-mode: nil
   1395 * End:
   1396 *
   1397 */
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE