tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

icupkg.cpp (20694B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2014, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  icupkg.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005jul29
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This tool operates on ICU data (.dat package) files.
     19 *   It takes one as input, or creates an empty one, and can remove, add, and
     20 *   extract data pieces according to command-line options.
     21 *   At the same time, it swaps each piece to a consistent set of platform
     22 *   properties as desired.
     23 *   Useful as an install-time tool for shipping only one flavor of ICU data
     24 *   and preparing data files for the target platform.
     25 *   Also for customizing ICU data (pruning, augmenting, replacing) and for
     26 *   taking it apart.
     27 *   Subsumes functionality and implementation code from
     28 *   gencmn, decmn, and icuswap tools.
     29 *   Will not work with data DLLs (shared libraries).
     30 */
     31 
     32 #include "unicode/utypes.h"
     33 #include "unicode/putil.h"
     34 #include "cstring.h"
     35 #include "toolutil.h"
     36 #include "uoptions.h"
     37 #include "uparse.h"
     38 #include "filestrm.h"
     39 #include "package.h"
     40 #include "pkg_icu.h"
     41 
     42 #include <stdio.h>
     43 #include <stdlib.h>
     44 #include <string.h>
     45 
     46 U_NAMESPACE_USE
     47 
     48 // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching?
     49 
     50 // general definitions ----------------------------------------------------- ***
     51 
     52 // main() ------------------------------------------------------------------ ***
     53 
     54 static void
     55 printUsage(const char *pname, UBool isHelp) {
     56    FILE *where=isHelp ? stdout : stderr;
     57 
     58    fprintf(where,
     59            "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n"
     60            "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n"
     61            "\t[-s path] [-d path] [-w] [-m mode]\n"
     62            "\t[--ignore-deps]\n"
     63            "\t[--auto_toc_prefix] [--auto_toc_prefix_with_type] [--toc_prefix]\n"
     64            "\tinfilename [outfilename]\n",
     65            isHelp ? 'U' : 'u', pname);
     66    if(isHelp) {
     67        fprintf(where,
     68            "\n"
     69            "Read the input ICU .dat package file, modify it according to the options,\n"
     70            "swap it to the desired platform properties (charset & endianness),\n"
     71            "and optionally write the resulting ICU .dat package to the output file.\n"
     72            "Items are removed, then added, then extracted and listed.\n"
     73            "An ICU .dat package is written if items are removed or added,\n"
     74            "or if the input and output filenames differ,\n"
     75            "or if the --writepkg (-w) option is set.\n");
     76        fprintf(where,
     77            "\n"
     78            "If the input filename is \"new\" then an empty package is created.\n"
     79            "If the output filename is missing, then it is automatically generated\n"
     80            "from the input filename: If the input filename ends with an l, b, or e\n"
     81            "matching its platform properties, then the output filename will\n"
     82            "contain the letter from the -t (--type) option.\n");
     83        fprintf(where,
     84            "\n"
     85            "This tool can also be used to just swap a single ICU data file, replacing the\n"
     86            "former icuswap tool. For this mode, provide the infilename (and optional\n"
     87            "outfilename) for a non-package ICU data file.\n"
     88            "Allowed options include -t, -w, -s and -d.\n"
     89            "The filenames can be absolute, or relative to the source/dest dir paths.\n"
     90            "Other options are not allowed in this mode.\n");
     91        fprintf(where,
     92            "\n"
     93            "Options:\n"
     94            "\t(Only the last occurrence of an option is used.)\n"
     95            "\n"
     96            "\t-h or -? or --help    print this message and exit\n");
     97        fprintf(where,
     98            "\n"
     99            "\t-tl or --type l   output for little-endian/ASCII charset family\n"
    100            "\t-tb or --type b   output for big-endian/ASCII charset family\n"
    101            "\t-te or --type e   output for big-endian/EBCDIC charset family\n"
    102            "\t                  The output type defaults to the input type.\n"
    103            "\n"
    104            "\t-c or --copyright include the ICU copyright notice\n"
    105            "\t-C comment or --comment comment   include a comment string\n");
    106        fprintf(where,
    107            "\n"
    108            "\t-a list or --add list      add items to the package\n"
    109            "\t-r list or --remove list   remove items from the package\n"
    110            "\t-x list or --extract list  extract items from the package\n"
    111            "\tThe list can be a single item's filename,\n"
    112            "\tor a .txt filename with a list of item filenames,\n"
    113            "\tor an ICU .dat package filename.\n");
    114        fprintf(where,
    115            "\n"
    116            "\t-w or --writepkg  write the output package even if no items are removed\n"
    117            "\t                  or added (e.g., for only swapping the data)\n");
    118        fprintf(where,
    119            "\n"
    120            "\t-m mode or --matchmode mode  set the matching mode for item names with\n"
    121            "\t                             wildcards\n"
    122            "\t        noslash: the '*' wildcard does not match the '/' tree separator\n");
    123        fprintf(where,
    124            "\n"
    125            "\t--ignore-deps     Do not fail if not all resource dependencies are met. Use this\n"
    126            "\t                  option if the missing resources come from another source.");
    127        fprintf(where,
    128            "\n"
    129            "\tIn the .dat package, the Table of Contents (ToC) contains an entry\n"
    130            "\tfor each item of the form prefix/tree/itemname .\n"
    131            "\tThe prefix normally matches the package basename, and icupkg checks that,\n"
    132            "\tbut this is not necessary when ICU need not find and load the package by filename.\n"
    133            "\tICU package names end with the platform type letter, and thus differ\n"
    134            "\tbetween platform types. This is not required for user data packages.\n");
    135        fprintf(where,
    136            "\n"
    137            "\t--auto_toc_prefix            automatic ToC entries prefix\n"
    138            "\t                             Uses the prefix of the first entry of the\n"
    139            "\t                             input package, rather than its basename.\n"
    140            "\t                             Requires a non-empty input package.\n"
    141            "\t--auto_toc_prefix_with_type  auto_toc_prefix + adjust platform type\n"
    142            "\t                             Same as auto_toc_prefix but also checks that\n"
    143            "\t                             the prefix ends with the input platform\n"
    144            "\t                             type letter, and modifies it to the output\n"
    145            "\t                             platform type letter.\n"
    146            "\t                At most one of the auto_toc_prefix options\n"
    147            "\t                can be used at a time.\n"
    148            "\t--toc_prefix prefix          ToC prefix to be used in the output package\n"
    149            "\t                             Overrides the package basename\n"
    150            "\t                             and --auto_toc_prefix.\n"
    151            "\t                             Cannot be combined with --auto_toc_prefix_with_type.\n");
    152        /*
    153         * Usage text columns, starting after the initial TAB.
    154         *      1         2         3         4         5         6         7         8
    155         *     901234567890123456789012345678901234567890123456789012345678901234567890
    156         */
    157        fprintf(where,
    158            "\n"
    159            "\tList file syntax: Items are listed on one or more lines and separated\n"
    160            "\tby whitespace (space+tab).\n"
    161            "\tComments begin with # and are ignored. Empty lines are ignored.\n"
    162            "\tLines where the first non-whitespace character is one of %s\n"
    163            "\tare also ignored, to reserve for future syntax.\n",
    164            U_PKG_RESERVED_CHARS);
    165        fprintf(where,
    166            "\tItems for removal or extraction may contain a single '*' wildcard\n"
    167            "\tcharacter. The '*' matches zero or more characters.\n"
    168            "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n"
    169            "\tdoes not match '/'.\n");
    170        fprintf(where,
    171            "\n"
    172            "\tItems must be listed relative to the package, and the --sourcedir or\n"
    173            "\tthe --destdir path will be prepended.\n"
    174            "\tThe paths are only prepended to item filenames while adding or\n"
    175            "\textracting items, not to ICU .dat package or list filenames.\n"
    176            "\t\n"
    177            "\tPaths may contain '/' instead of the platform's\n"
    178            "\tfile separator character, and are converted as appropriate.\n");
    179        fprintf(where,
    180            "\n"
    181            "\t-s path or --sourcedir path  directory for the --add items\n"
    182            "\t-d path or --destdir path    directory for the --extract items\n"
    183            "\n"
    184            "\t-l or --list                 list the package items\n"
    185            "\t                             (after modifying the package)\n"
    186            "\t                             to stdout or to output list file\n"
    187            "\t-o path or --outlist path    path/filename for the --list output\n");
    188    }
    189 }
    190 
    191 static UOption options[]={
    192    UOPTION_HELP_H,
    193    UOPTION_HELP_QUESTION_MARK,
    194    UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG),
    195 
    196    UOPTION_COPYRIGHT,
    197    UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG),
    198 
    199    UOPTION_SOURCEDIR,
    200    UOPTION_DESTDIR,
    201 
    202    UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG),
    203 
    204    UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG),
    205 
    206    UOPTION_DEF("ignore-deps", '\1', UOPT_NO_ARG),
    207 
    208    UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG),
    209    UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG),
    210    UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG),
    211 
    212    UOPTION_DEF("list", 'l', UOPT_NO_ARG),
    213    UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG),
    214 
    215    UOPTION_DEF("auto_toc_prefix", '\1', UOPT_NO_ARG),
    216    UOPTION_DEF("auto_toc_prefix_with_type", '\1', UOPT_NO_ARG),
    217    UOPTION_DEF("toc_prefix", '\1', UOPT_REQUIRES_ARG)
    218 };
    219 
    220 enum {
    221    OPT_HELP_H,
    222    OPT_HELP_QUESTION_MARK,
    223    OPT_OUT_TYPE,
    224 
    225    OPT_COPYRIGHT,
    226    OPT_COMMENT,
    227 
    228    OPT_SOURCEDIR,
    229    OPT_DESTDIR,
    230 
    231    OPT_WRITEPKG,
    232 
    233    OPT_MATCHMODE,
    234 
    235    OPT_IGNORE_DEPS,
    236 
    237    OPT_ADD_LIST,
    238    OPT_REMOVE_LIST,
    239    OPT_EXTRACT_LIST,
    240 
    241    OPT_LIST_ITEMS,
    242    OPT_LIST_FILE,
    243 
    244    OPT_AUTO_TOC_PREFIX,
    245    OPT_AUTO_TOC_PREFIX_WITH_TYPE,
    246    OPT_TOC_PREFIX,
    247 
    248    OPT_COUNT
    249 };
    250 
    251 static UBool
    252 isPackageName(const char *filename) {
    253    int32_t len;
    254 
    255    len = static_cast<int32_t>(strlen(filename)) - 4; /* -4: subtract the length of ".dat" */
    256    return len > 0 && 0 == strcmp(filename + len, ".dat");
    257 }
    258 /*
    259 This line is required by MinGW because it incorrectly globs the arguments.
    260 So when \* is used, it turns into a list of files instead of a literal "*"
    261 */
    262 int _CRT_glob = 0;
    263 
    264 extern int
    265 main(int argc, char *argv[]) {
    266    const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
    267    char outType;
    268    UBool isHelp, isModified, isPackage;
    269    int result = 0;
    270 
    271    Package *pkg, *listPkg, *addListPkg;
    272 
    273    U_MAIN_INIT_ARGS(argc, argv);
    274 
    275    /* get the program basename */
    276    pname=findBasename(argv[0]);
    277 
    278    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    279    isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
    280    if(isHelp) {
    281        printUsage(pname, true);
    282        return U_ZERO_ERROR;
    283    }
    284 
    285    pkg=new Package;
    286    if(pkg==nullptr) {
    287        fprintf(stderr, "icupkg: not enough memory\n");
    288        return U_MEMORY_ALLOCATION_ERROR;
    289    }
    290    isModified=false;
    291 
    292    int autoPrefix=0;
    293    if(options[OPT_AUTO_TOC_PREFIX].doesOccur) {
    294        pkg->setAutoPrefix();
    295        ++autoPrefix;
    296    }
    297    if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) {
    298        if(options[OPT_TOC_PREFIX].doesOccur) {
    299            fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n");
    300            printUsage(pname, false);
    301            return U_ILLEGAL_ARGUMENT_ERROR;
    302        }
    303        pkg->setAutoPrefixWithType();
    304        ++autoPrefix;
    305    }
    306    if(argc<2 || 3<argc || autoPrefix>1) {
    307        printUsage(pname, false);
    308        return U_ILLEGAL_ARGUMENT_ERROR;
    309    }
    310 
    311    if(options[OPT_SOURCEDIR].doesOccur) {
    312        sourcePath=options[OPT_SOURCEDIR].value;
    313    } else {
    314        // work relative to the current working directory
    315        sourcePath=nullptr;
    316    }
    317    if(options[OPT_DESTDIR].doesOccur) {
    318        destPath=options[OPT_DESTDIR].value;
    319    } else {
    320        // work relative to the current working directory
    321        destPath=nullptr;
    322    }
    323 
    324    if(0==strcmp(argv[1], "new")) {
    325        if(autoPrefix) {
    326            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n");
    327            printUsage(pname, false);
    328            return U_ILLEGAL_ARGUMENT_ERROR;
    329        }
    330        inFilename=nullptr;
    331        isPackage=true;
    332    } else {
    333        inFilename=argv[1];
    334        if(isPackageName(inFilename)) {
    335            pkg->readPackage(inFilename);
    336            isPackage=true;
    337        } else {
    338            /* swap a single file (icuswap replacement) rather than work on a package */
    339            pkg->addFile(sourcePath, inFilename);
    340            isPackage=false;
    341        }
    342    }
    343 
    344    if(argc>=3) {
    345        outFilename=argv[2];
    346        if(0!=strcmp(argv[1], argv[2])) {
    347            isModified=true;
    348        }
    349    } else if(isPackage) {
    350        outFilename=nullptr;
    351    } else /* !isPackage */ {
    352        outFilename=inFilename;
    353        isModified = static_cast<UBool>(sourcePath != destPath);
    354    }
    355 
    356    /* parse the output type option */
    357    if(options[OPT_OUT_TYPE].doesOccur) {
    358        const char *type=options[OPT_OUT_TYPE].value;
    359        if(type[0]==0 || type[1]!=0) {
    360            /* the type must be exactly one letter */
    361            printUsage(pname, false);
    362            return U_ILLEGAL_ARGUMENT_ERROR;
    363        }
    364        outType=type[0];
    365        switch(outType) {
    366        case 'l':
    367        case 'b':
    368        case 'e':
    369            break;
    370        default:
    371            printUsage(pname, false);
    372            return U_ILLEGAL_ARGUMENT_ERROR;
    373        }
    374 
    375        /*
    376         * Set the isModified flag if the output type differs from the
    377         * input package type.
    378         * If we swap a single file, just assume that we are modifying it.
    379         * The Package class does not give us access to the item and its type.
    380         */
    381        isModified |= static_cast<UBool>(!isPackage || outType != pkg->getInType());
    382    } else if(isPackage) {
    383        outType=pkg->getInType(); // default to input type
    384    } else /* !isPackage: swap single file */ {
    385        outType=0; /* tells extractItem() to not swap */
    386    }
    387 
    388    if(options[OPT_WRITEPKG].doesOccur) {
    389        isModified=true;
    390    }
    391 
    392    if(!isPackage) {
    393        /*
    394         * icuswap tool replacement: Only swap a single file.
    395         * Check that irrelevant options are not set.
    396         */
    397        if( options[OPT_COMMENT].doesOccur ||
    398            options[OPT_COPYRIGHT].doesOccur ||
    399            options[OPT_MATCHMODE].doesOccur ||
    400            options[OPT_REMOVE_LIST].doesOccur ||
    401            options[OPT_ADD_LIST].doesOccur ||
    402            options[OPT_EXTRACT_LIST].doesOccur ||
    403            options[OPT_LIST_ITEMS].doesOccur
    404        ) {
    405            printUsage(pname, false);
    406            return U_ILLEGAL_ARGUMENT_ERROR;
    407        }
    408        if(isModified) {
    409            pkg->extractItem(destPath, outFilename, 0, outType);
    410        }
    411 
    412        delete pkg;
    413        return result;
    414    }
    415 
    416    /* Work with a package. */
    417 
    418    if(options[OPT_COMMENT].doesOccur) {
    419        outComment=options[OPT_COMMENT].value;
    420    } else if(options[OPT_COPYRIGHT].doesOccur) {
    421        outComment=U_COPYRIGHT_STRING;
    422    } else {
    423        outComment=nullptr;
    424    }
    425 
    426    if(options[OPT_MATCHMODE].doesOccur) {
    427        if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
    428            pkg->setMatchMode(Package::MATCH_NOSLASH);
    429        } else {
    430            printUsage(pname, false);
    431            return U_ILLEGAL_ARGUMENT_ERROR;
    432        }
    433    }
    434 
    435    /* remove items */
    436    if(options[OPT_REMOVE_LIST].doesOccur) {
    437        listPkg=new Package();
    438        if(listPkg==nullptr) {
    439            fprintf(stderr, "icupkg: not enough memory\n");
    440            exit(U_MEMORY_ALLOCATION_ERROR);
    441        }
    442        if(readList(nullptr, options[OPT_REMOVE_LIST].value, false, listPkg)) {
    443            pkg->removeItems(*listPkg);
    444            delete listPkg;
    445            isModified=true;
    446        } else {
    447            printUsage(pname, false);
    448            return U_ILLEGAL_ARGUMENT_ERROR;
    449        }
    450    }
    451 
    452    /*
    453     * add items
    454     * use a separate Package so that its memory and items stay around
    455     * as long as the main Package
    456     */
    457    addListPkg=nullptr;
    458    if(options[OPT_ADD_LIST].doesOccur) {
    459        addListPkg=new Package();
    460        if(addListPkg==nullptr) {
    461            fprintf(stderr, "icupkg: not enough memory\n");
    462            exit(U_MEMORY_ALLOCATION_ERROR);
    463        }
    464        if(readList(sourcePath, options[OPT_ADD_LIST].value, true, addListPkg)) {
    465            pkg->addItems(*addListPkg);
    466            // delete addListPkg; deferred until after writePackage()
    467            isModified=true;
    468        } else {
    469            printUsage(pname, false);
    470            return U_ILLEGAL_ARGUMENT_ERROR;
    471        }
    472    }
    473 
    474    /* extract items */
    475    if(options[OPT_EXTRACT_LIST].doesOccur) {
    476        listPkg=new Package();
    477        if(listPkg==nullptr) {
    478            fprintf(stderr, "icupkg: not enough memory\n");
    479            exit(U_MEMORY_ALLOCATION_ERROR);
    480        }
    481        if(readList(nullptr, options[OPT_EXTRACT_LIST].value, false, listPkg)) {
    482            pkg->extractItems(destPath, *listPkg, outType);
    483            delete listPkg;
    484        } else {
    485            printUsage(pname, false);
    486            return U_ILLEGAL_ARGUMENT_ERROR;
    487        }
    488    }
    489 
    490    /* list items */
    491    if(options[OPT_LIST_ITEMS].doesOccur) {
    492        int32_t i;
    493        if (options[OPT_LIST_FILE].doesOccur) {
    494            FileStream *out;
    495            out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
    496            if (out != nullptr) {
    497                for(i=0; i<pkg->getItemCount(); ++i) {
    498                    T_FileStream_writeLine(out, pkg->getItem(i)->name);
    499                    T_FileStream_writeLine(out, "\n");
    500                }
    501                T_FileStream_close(out);
    502            } else {
    503                return U_ILLEGAL_ARGUMENT_ERROR;
    504            }
    505        } else {
    506            for(i=0; i<pkg->getItemCount(); ++i) {
    507                fprintf(stdout, "%s\n", pkg->getItem(i)->name);
    508            }
    509        }
    510    }
    511 
    512    /* check dependencies between items */
    513    if(!options[OPT_IGNORE_DEPS].doesOccur && !pkg->checkDependencies()) {
    514        /* some dependencies are not fulfilled */
    515        return U_MISSING_RESOURCE_ERROR;
    516    }
    517 
    518    /* write the output .dat package if there are any modifications */
    519    if(isModified) {
    520        char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary
    521 
    522        if(outFilename==nullptr || outFilename[0]==0) {
    523            if(inFilename==nullptr || inFilename[0]==0) {
    524                fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
    525                exit(U_ILLEGAL_ARGUMENT_ERROR);
    526            }
    527 
    528            /*
    529             * auto-generate a filename:
    530             * copy the inFilename,
    531             * and if the last basename character matches the input file's type,
    532             * then replace it with the output file's type
    533             */
    534            char suffix[6]="?.dat";
    535            char *s;
    536 
    537            suffix[0]=pkg->getInType();
    538            strcpy(outFilenameBuffer, inFilename);
    539            s=strchr(outFilenameBuffer, 0);
    540            if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
    541                *(s-5)=outType;
    542            }
    543            outFilename=outFilenameBuffer;
    544        }
    545        if(options[OPT_TOC_PREFIX].doesOccur) {
    546            pkg->setPrefix(options[OPT_TOC_PREFIX].value);
    547        }
    548        result = writePackageDatFile(outFilename, outComment, nullptr, nullptr, pkg, outType);
    549    }
    550 
    551    delete addListPkg;
    552    delete pkg;
    553    return result;
    554 }
    555 
    556 /*
    557 * Hey, Emacs, please set the following:
    558 *
    559 * Local Variables:
    560 * indent-tabs-mode: nil
    561 * End:
    562 *
    563 */