tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pkg_gencmn.cpp (18750B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /******************************************************************************
      4 *   Copyright (C) 2008-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *******************************************************************************
      7 */
      8 #include "unicode/utypes.h"
      9 
     10 #include <stdio.h>
     11 #include <stdlib.h>
     12 #include "unicode/utypes.h"
     13 #include "unicode/putil.h"
     14 #include "cmemory.h"
     15 #include "cstring.h"
     16 #include "filestrm.h"
     17 #include "toolutil.h"
     18 #include "unicode/uclean.h"
     19 #include "unewdata.h"
     20 #include "putilimp.h"
     21 #include "pkg_gencmn.h"
     22 
     23 #define STRING_STORE_SIZE 200000
     24 
     25 #define COMMON_DATA_NAME U_ICUDATA_NAME
     26 #define DATA_TYPE "dat"
     27 
     28 /* ICU package data file format (.dat files) ------------------------------- ***
     29 
     30 Description of the data format after the usual ICU data file header
     31 (UDataInfo etc.).
     32 
     33 Format version 1
     34 
     35 A .dat package file contains a simple Table of Contents of item names,
     36 followed by the items themselves:
     37 
     38 1. ToC table
     39 
     40 uint32_t count; - number of items
     41 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
     42    uint32_t nameOffset; - offset of the item name
     43    uint32_t dataOffset; - offset of the item data
     44 both are byte offsets from the beginning of the data
     45 
     46 2. item name strings
     47 
     48 All item names are stored as char * strings in one block between the ToC table
     49 and the data items.
     50 
     51 3. data items
     52 
     53 The data items are stored following the item names block.
     54 Each data item is 16-aligned.
     55 The data items are stored in the sorted order of their names.
     56 
     57 Therefore, the top of the name strings block is the offset of the first item,
     58 the length of the last item is the difference between its offset and
     59 the .dat file length, and the length of all previous items is the difference
     60 between its offset and the next one.
     61 
     62 ----------------------------------------------------------------------------- */
     63 
     64 /* UDataInfo cf. udata.h */
     65 static const UDataInfo dataInfo={
     66    sizeof(UDataInfo),
     67    0,
     68 
     69    U_IS_BIG_ENDIAN,
     70    U_CHARSET_FAMILY,
     71    sizeof(char16_t),
     72    0,
     73 
     74    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
     75    {1, 0, 0, 0},                 /* formatVersion */
     76    {3, 0, 0, 0}                  /* dataVersion */
     77 };
     78 
     79 static uint32_t maxSize;
     80 
     81 static char stringStore[STRING_STORE_SIZE];
     82 static uint32_t stringTop=0, basenameTotal=0;
     83 
     84 typedef struct {
     85    char *pathname, *basename;
     86    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
     87 } File;
     88 
     89 #define CHUNK_FILE_COUNT 256
     90 static File *files = nullptr;
     91 static uint32_t fileCount=0;
     92 static uint32_t fileMax = 0;
     93 
     94 
     95 static char *symPrefix = nullptr;
     96 
     97 #define LINE_BUFFER_SIZE 512
     98 /* prototypes --------------------------------------------------------------- */
     99 
    100 static void
    101 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
    102 
    103 static char *
    104 allocString(uint32_t length);
    105 
    106 U_CDECL_BEGIN
    107 static int
    108 compareFiles(const void *file1, const void *file2);
    109 U_CDECL_END
    110 
    111 static char *
    112 pathToFullPath(const char *path, const char *source);
    113 
    114 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
    115 static void
    116 fixDirToTreePath(char *s);
    117 /* -------------------------------------------------------------------------- */
    118 
    119 U_CAPI void U_EXPORT2
    120 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
    121                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
    122    static char buffer[4096];
    123    char *line;
    124    char *linePtr;
    125    char *s = nullptr;
    126    UErrorCode errorCode=U_ZERO_ERROR;
    127    uint32_t i, fileOffset, basenameOffset, length, nread;
    128    FileStream *in, *file;
    129 
    130    line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
    131    if (line == nullptr) {
    132        fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
    133        exit(U_MEMORY_ALLOCATION_ERROR);
    134    }
    135 
    136    linePtr = line;
    137 
    138    maxSize = max_size;
    139 
    140    if (destDir == nullptr) {
    141        destDir = u_getDataDirectory();
    142    }
    143    if (name == nullptr) {
    144        name = COMMON_DATA_NAME;
    145    }
    146    if (type == nullptr) {
    147        type = DATA_TYPE;
    148    }
    149    if (source == nullptr) {
    150        source = ".";
    151    }
    152 
    153    if (dataFile == nullptr) {
    154        in = T_FileStream_stdin();
    155    } else {
    156        in = T_FileStream_open(dataFile, "r");
    157        if(in == nullptr) {
    158            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
    159            exit(U_FILE_ACCESS_ERROR);
    160        }
    161    }
    162 
    163    if (verbose) {
    164        if(sourceTOC) {
    165            printf("generating %s_%s.c (table of contents source file)\n", name, type);
    166        } else {
    167            printf("generating %s.%s (common data file with table of contents)\n", name, type);
    168        }
    169    }
    170 
    171    /* read the list of files and get their lengths */
    172    while((s != nullptr && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
    173                                                             LINE_BUFFER_SIZE))!=nullptr) {
    174        /* remove trailing newline characters and parse space separated items */
    175        if (s != nullptr && *s != 0) {
    176            line=s;
    177        } else {
    178            s=line;
    179        }
    180        while(*s!=0) {
    181            if(*s==' ') {
    182                *s=0;
    183                ++s;
    184                break;
    185            } else if(*s=='\r' || *s=='\n') {
    186                *s=0;
    187                break;
    188            }
    189            ++s;
    190        }
    191 
    192        /* check for comment */
    193 
    194        if (*line == '#') {
    195            continue;
    196        }
    197 
    198        /* add the file */
    199 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
    200        {
    201          char *t;
    202          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
    203            *t = U_FILE_SEP_CHAR;
    204          }
    205        }
    206 #endif
    207        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
    208    }
    209 
    210    uprv_free(linePtr);
    211 
    212    if(in!=T_FileStream_stdin()) {
    213        T_FileStream_close(in);
    214    }
    215 
    216    if(fileCount==0) {
    217        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == nullptr ? "<stdin>" : dataFile);
    218        return;
    219    }
    220 
    221    /* sort the files by basename */
    222    qsort(files, fileCount, sizeof(File), compareFiles);
    223 
    224    if(!sourceTOC) {
    225        UNewDataMemory *out;
    226 
    227        /* determine the offsets of all basenames and files in this common one */
    228        basenameOffset=4+8*fileCount;
    229        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
    230        for(i=0; i<fileCount; ++i) {
    231            files[i].fileOffset=fileOffset;
    232            fileOffset+=(files[i].fileSize+15)&~0xf;
    233            files[i].basenameOffset=basenameOffset;
    234            basenameOffset+=files[i].basenameLength;
    235        }
    236 
    237        /* create the output file */
    238        out=udata_create(destDir, type, name,
    239                         &dataInfo,
    240                         copyRight == nullptr ? U_COPYRIGHT_STRING : copyRight,
    241                         &errorCode);
    242        if(U_FAILURE(errorCode)) {
    243            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
    244                destDir, name, type,
    245                u_errorName(errorCode));
    246            exit(errorCode);
    247        }
    248 
    249        /* write the table of contents */
    250        udata_write32(out, fileCount);
    251        for(i=0; i<fileCount; ++i) {
    252            udata_write32(out, files[i].basenameOffset);
    253            udata_write32(out, files[i].fileOffset);
    254        }
    255 
    256        /* write the basenames */
    257        for(i=0; i<fileCount; ++i) {
    258            udata_writeString(out, files[i].basename, files[i].basenameLength);
    259        }
    260        length=4+8*fileCount+basenameTotal;
    261 
    262        /* copy the files */
    263        for(i=0; i<fileCount; ++i) {
    264            /* pad to 16-align the next file */
    265            length&=0xf;
    266            if(length!=0) {
    267                udata_writePadding(out, 16-length);
    268            }
    269 
    270            if (verbose) {
    271                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    272            }
    273 
    274            /* copy the next file */
    275            file=T_FileStream_open(files[i].pathname, "rb");
    276            if(file==nullptr) {
    277                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
    278                exit(U_FILE_ACCESS_ERROR);
    279            }
    280            for(nread = 0;;) {
    281                length=T_FileStream_read(file, buffer, sizeof(buffer));
    282                if(length <= 0) {
    283                    break;
    284                }
    285                nread += length;
    286                udata_writeBlock(out, buffer, length);
    287            }
    288            T_FileStream_close(file);
    289            length=files[i].fileSize;
    290 
    291            if (nread != files[i].fileSize) {
    292              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    293                exit(U_FILE_ACCESS_ERROR);
    294            }
    295        }
    296 
    297        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
    298        length&=0xf;
    299        if(length!=0) {
    300            udata_writePadding(out, 16-length);
    301        }
    302 
    303        /* finish */
    304        udata_finish(out, &errorCode);
    305        if(U_FAILURE(errorCode)) {
    306            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
    307            exit(errorCode);
    308        }
    309    } else {
    310        /* write a .c source file with the table of contents */
    311        char *filename;
    312        FileStream *out;
    313 
    314        /* create the output filename */
    315        filename=s=buffer;
    316        uprv_strcpy(filename, destDir);
    317        s=filename+uprv_strlen(filename);
    318        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
    319            *s++=U_FILE_SEP_CHAR;
    320        }
    321        uprv_strcpy(s, name);
    322        if(*(type)!=0) {
    323            s+=uprv_strlen(s);
    324            *s++='_';
    325            uprv_strcpy(s, type);
    326        }
    327        s+=uprv_strlen(s);
    328        uprv_strcpy(s, ".c");
    329 
    330        /* open the output file */
    331        out=T_FileStream_open(filename, "w");
    332        if (gencmnFileName != nullptr) {
    333            uprv_strcpy(gencmnFileName, filename);
    334        }
    335        if(out==nullptr) {
    336            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
    337            exit(U_FILE_ACCESS_ERROR);
    338        }
    339 
    340        /* write the source file */
    341        snprintf(buffer, sizeof(buffer),
    342            "/*\n"
    343            " * ICU common data table of contents for %s.%s\n"
    344            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
    345            " */\n\n"
    346            "#include \"unicode/utypes.h\"\n"
    347            "#include \"unicode/udata.h\"\n"
    348            "\n"
    349            "/* external symbol declarations for data (%d files) */\n",
    350                name, type, fileCount);
    351        T_FileStream_writeLine(out, buffer);
    352 
    353        snprintf(buffer, sizeof(buffer), "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
    354        T_FileStream_writeLine(out, buffer);
    355        for(i=1; i<fileCount; ++i) {
    356            snprintf(buffer, sizeof(buffer), ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
    357            T_FileStream_writeLine(out, buffer);
    358        }
    359        T_FileStream_writeLine(out, ";\n\n");
    360 
    361        snprintf(
    362            buffer, sizeof(buffer),
    363            "U_EXPORT const struct {\n"
    364            "    uint16_t headerSize;\n"
    365            "    uint8_t magic1, magic2;\n"
    366            "    UDataInfo info;\n"
    367            "    char padding[%lu];\n"
    368            "    uint32_t count, reserved;\n"
    369            "    struct {\n"
    370            "        const char *name;\n"
    371            "        const void *data;\n"
    372            "    } toc[%lu];\n"
    373            "} U_EXPORT2 %s_dat = {\n"
    374            "    32, 0xda, 0x27, {\n"
    375            "        %lu, 0,\n"
    376            "        %u, %u, %u, 0,\n"
    377            "        {0x54, 0x6f, 0x43, 0x50},\n"
    378            "        {1, 0, 0, 0},\n"
    379            "        {0, 0, 0, 0}\n"
    380            "    },\n"
    381            "    \"\", %lu, 0, {\n",
    382            static_cast<unsigned long>(32-4-sizeof(UDataInfo)),
    383            static_cast<unsigned long>(fileCount),
    384            entrypointName,
    385            static_cast<unsigned long>(sizeof(UDataInfo)),
    386            U_IS_BIG_ENDIAN,
    387            U_CHARSET_FAMILY,
    388            U_SIZEOF_UCHAR,
    389            static_cast<unsigned long>(fileCount)
    390        );
    391        T_FileStream_writeLine(out, buffer);
    392 
    393        snprintf(buffer, sizeof(buffer), "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
    394        T_FileStream_writeLine(out, buffer);
    395        for(i=1; i<fileCount; ++i) {
    396            snprintf(buffer, sizeof(buffer), ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
    397            T_FileStream_writeLine(out, buffer);
    398        }
    399 
    400        T_FileStream_writeLine(out, "\n    }\n};\n");
    401        T_FileStream_close(out);
    402 
    403        uprv_free(symPrefix);
    404    }
    405 }
    406 
    407 static void
    408 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
    409    char *s;
    410    uint32_t length;
    411    char *fullPath = nullptr;
    412 
    413    if(fileCount==fileMax) {
    414      fileMax += CHUNK_FILE_COUNT;
    415      files = static_cast<File*>(uprv_realloc(files, fileMax * sizeof(files[0]))); /* note: never freed. */
    416      if(files==nullptr) {
    417        fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", static_cast<unsigned int>(fileMax * sizeof(files[0])), fileCount);
    418        exit(U_MEMORY_ALLOCATION_ERROR);
    419      }
    420    }
    421 
    422    if(!sourceTOC) {
    423        FileStream *file;
    424 
    425        if(uprv_pathIsAbsolute(filename)) {
    426            fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
    427            exit(U_ILLEGAL_ARGUMENT_ERROR);
    428        }
    429        fullPath = pathToFullPath(filename, source);
    430        /* store the pathname */
    431        length = static_cast<uint32_t>(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    432        s=allocString(length);
    433        uprv_strcpy(s, name);
    434        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    435        uprv_strcat(s, filename);
    436 
    437        /* get the basename */
    438        fixDirToTreePath(s);
    439        files[fileCount].basename=s;
    440        files[fileCount].basenameLength=length;
    441 
    442        files[fileCount].pathname=fullPath;
    443 
    444        basenameTotal+=length;
    445 
    446        /* try to open the file */
    447        file=T_FileStream_open(fullPath, "rb");
    448        if(file==nullptr) {
    449            fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
    450            exit(U_FILE_ACCESS_ERROR);
    451        }
    452 
    453        /* get the file length */
    454        length=T_FileStream_size(file);
    455        if(T_FileStream_error(file) || length<=20) {
    456            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
    457            exit(U_FILE_ACCESS_ERROR);
    458        }
    459 
    460        T_FileStream_close(file);
    461 
    462        /* do not add files that are longer than maxSize */
    463        if(maxSize && length>maxSize) {
    464            if (verbose) {
    465                printf("%s ignored (size %ld > %ld)\n", fullPath, static_cast<long>(length), static_cast<long>(maxSize));
    466            }
    467            return;
    468        }
    469        files[fileCount].fileSize=length;
    470    } else {
    471        char *t;
    472        /* get and store the basename */
    473        /* need to include the package name */
    474        length = static_cast<uint32_t>(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    475        s=allocString(length);
    476        uprv_strcpy(s, name);
    477        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    478        uprv_strcat(s, filename);
    479        fixDirToTreePath(s);
    480        files[fileCount].basename=s;
    481        /* turn the basename into an entry point name and store in the pathname field */
    482        t=files[fileCount].pathname=allocString(length);
    483        while(--length>0) {
    484            if(*s=='.' || *s=='-' || *s=='/') {
    485                *t='_';
    486            } else {
    487                *t=*s;
    488            }
    489            ++s;
    490            ++t;
    491        }
    492        *t=0;
    493    }
    494    ++fileCount;
    495 }
    496 
    497 static char *
    498 allocString(uint32_t length) {
    499    uint32_t top=stringTop+length;
    500    char *p;
    501 
    502    if(top>STRING_STORE_SIZE) {
    503        fprintf(stderr, "gencmn: out of memory\n");
    504        exit(U_MEMORY_ALLOCATION_ERROR);
    505    }
    506    p=stringStore+stringTop;
    507    stringTop=top;
    508    return p;
    509 }
    510 
    511 static char *
    512 pathToFullPath(const char *path, const char *source) {
    513    int32_t length;
    514    int32_t newLength;
    515    char *fullPath;
    516    int32_t n;
    517 
    518    length = static_cast<uint32_t>(uprv_strlen(path) + 1);
    519    newLength = (length + 1 + static_cast<int32_t>(uprv_strlen(source)));
    520    fullPath = static_cast<char*>(uprv_malloc(newLength));
    521    if(source != nullptr) {
    522        uprv_strcpy(fullPath, source);
    523        uprv_strcat(fullPath, U_FILE_SEP_STRING);
    524    } else {
    525        fullPath[0] = 0;
    526    }
    527    n = static_cast<int32_t>(uprv_strlen(fullPath));
    528    fullPath[n] = 0;       /* Suppress compiler warning for unused variable n    */
    529                           /*  when conditional code below is not compiled.      */
    530    uprv_strcat(fullPath, path);
    531 
    532 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    533 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
    534    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    535    for(;fullPath[n];n++) {
    536        if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
    537            fullPath[n] = U_FILE_SEP_CHAR;
    538        }
    539    }
    540 #endif
    541 #endif
    542 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    543    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    544    for(;fullPath[n];n++) {
    545        if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
    546            fullPath[n] = U_FILE_SEP_CHAR;
    547        }
    548    }
    549 #endif
    550    return fullPath;
    551 }
    552 
    553 U_CDECL_BEGIN
    554 static int
    555 compareFiles(const void *file1, const void *file2) {
    556    /* sort by basename */
    557    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
    558 }
    559 U_CDECL_END
    560 
    561 static void
    562 fixDirToTreePath(char *s)
    563 {
    564    (void)s;
    565 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
    566    char *t;
    567 #endif
    568 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    569    for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
    570        *t = U_TREE_ENTRY_SEP_CHAR;
    571    }
    572 #endif
    573 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    574    for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
    575        *t = U_TREE_ENTRY_SEP_CHAR;
    576    }
    577 #endif
    578 }