tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

writesrc.cpp (16312B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  writesrc.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005apr23
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Helper functions for writing source code for data.
     19 */
     20 
     21 #include <stdio.h>
     22 #include <time.h>
     23 
     24 // The C99 standard suggested that C++ implementations not define PRId64 etc. constants
     25 // unless this macro is defined.
     26 // See the Notes at https://en.cppreference.com/w/cpp/types/integer .
     27 // Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h .
     28 #ifndef __STDC_FORMAT_MACROS
     29 #   define __STDC_FORMAT_MACROS
     30 #endif
     31 #include <cinttypes>
     32 
     33 #include "unicode/utypes.h"
     34 #include "unicode/putil.h"
     35 #include "unicode/ucptrie.h"
     36 #include "unicode/errorcode.h"
     37 #include "unicode/uniset.h"
     38 #include "unicode/usetiter.h"
     39 #include "unicode/utf16.h"
     40 #include "utrie2.h"
     41 #include "cstring.h"
     42 #include "writesrc.h"
     43 #include "util.h"
     44 
     45 U_NAMESPACE_BEGIN
     46 
     47 ValueNameGetter::~ValueNameGetter() {}
     48 
     49 U_NAMESPACE_END
     50 
     51 U_NAMESPACE_USE
     52 
     53 static FILE *
     54 usrc_createWithoutHeader(const char *path, const char *filename) {
     55    char buffer[1024];
     56    const char *p;
     57    char *q;
     58    FILE *f;
     59    char c;
     60 
     61    if(path==nullptr) {
     62        p=filename;
     63    } else {
     64        /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
     65        uprv_strcpy(buffer, path);
     66        q=buffer+uprv_strlen(buffer);
     67        if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
     68            *q++=U_FILE_SEP_CHAR;
     69        }
     70        uprv_strcpy(q, filename);
     71        p=buffer;
     72    }
     73 
     74    f=fopen(p, "w");
     75    if (f==nullptr) {
     76        fprintf(
     77            stderr,
     78            "usrc_create(%s, %s): unable to create file\n",
     79            path!=nullptr ? path : "", filename);
     80    }
     81    return f;
     82 }
     83 
     84 U_CAPI FILE * U_EXPORT2
     85 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
     86    FILE *f = usrc_createWithoutHeader(path, filename);
     87    if (f == nullptr) {
     88        return f;
     89    }
     90    usrc_writeCopyrightHeader(f, "//", copyrightYear);
     91    usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
     92    return f;
     93 }
     94 
     95 U_CAPI FILE * U_EXPORT2
     96 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
     97    FILE *f = usrc_createWithoutHeader(path, filename);
     98    if (f == nullptr) {
     99        return f;
    100    }
    101    usrc_writeCopyrightHeader(f, "#", copyrightYear);
    102    usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
    103    return f;
    104 }
    105 
    106 U_CAPI void U_EXPORT2
    107 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
    108    fprintf(f,
    109        "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
    110        "%s License & terms of use: http://www.unicode.org/copyright.html\n",
    111        prefix, copyrightYear, prefix);
    112    if (copyrightYear <= 2016) {
    113        fprintf(f,
    114            "%s Copyright (C) 1999-2016, International Business Machines\n"
    115            "%s Corporation and others.  All Rights Reserved.\n",
    116            prefix, prefix);
    117    }
    118 }
    119 
    120 U_CAPI void U_EXPORT2
    121 usrc_writeFileNameGeneratedBy(
    122        FILE *f,
    123        const char *prefix,
    124        const char *filename,
    125        const char *generator) {
    126    char buffer[1024];
    127    const struct tm *lt;
    128    time_t t;
    129 
    130    const char *pattern = 
    131        "%s\n"
    132        "%s file name: %s\n"
    133        "%s\n"
    134        "%s machine-generated by: %s\n"
    135        "\n";
    136 
    137    time(&t);
    138    lt=localtime(&t);
    139    if(generator==nullptr && lt!=nullptr) {
    140        strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
    141        fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
    142    } else {
    143        fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
    144    }
    145 }
    146 
    147 U_CAPI void U_EXPORT2
    148 usrc_writeArray(FILE *f,
    149                const char *prefix,
    150                const void *p, int32_t width, int32_t length,
    151                const char *indent,
    152                const char *postfix) {
    153    const uint8_t *p8;
    154    const uint16_t *p16;
    155    const uint32_t *p32;
    156    const int64_t *p64; // Signed due to TOML!
    157    int64_t value; // Signed due to TOML!
    158    int32_t i, col;
    159 
    160    p8=nullptr;
    161    p16=nullptr;
    162    p32=nullptr;
    163    p64=nullptr;
    164    switch(width) {
    165    case 1:
    166    case 8:
    167        p8=(const uint8_t *)p;
    168        break;
    169    case 16:
    170        p16=(const uint16_t *)p;
    171        break;
    172    case 32:
    173        p32=(const uint32_t *)p;
    174        break;
    175    case 64:
    176        p64=(const int64_t *)p;
    177        break;
    178    default:
    179        fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
    180        return;
    181    }
    182    if(prefix!=nullptr) {
    183        fprintf(f, prefix, (long)length);
    184    }
    185    for(i=col=0; i<length; ++i, ++col) {
    186        if(i>0) {
    187            if(col<16) {
    188                fputc(',', f);
    189            } else {
    190                fputs(",\n", f);
    191                fputs(indent, f);
    192                col=0;
    193            }
    194        }
    195        switch(width) {
    196        case 1:
    197        case 8:
    198            value=p8[i];
    199            break;
    200        case 16:
    201            value=p16[i];
    202            break;
    203        case 32:
    204            value=p32[i];
    205            break;
    206        case 64:
    207            value=p64[i];
    208            break;
    209        default:
    210            value=0; /* unreachable */
    211            break;
    212        }
    213        if (width == 1) {
    214            fprintf(f, value ? "true" : "false");
    215        } else {
    216            fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
    217        }
    218    }
    219    if(postfix!=nullptr) {
    220        fputs(postfix, f);
    221    }
    222 }
    223 
    224 U_CAPI void U_EXPORT2
    225 usrc_writeUTrie2Arrays(FILE *f,
    226                       const char *indexPrefix, const char *data32Prefix,
    227                       const UTrie2 *pTrie,
    228                       const char *postfix) {
    229    if(pTrie->data32==nullptr) {
    230        /* 16-bit trie */
    231        usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
    232    } else {
    233        /* 32-bit trie */
    234        usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
    235        usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
    236    }
    237 }
    238 
    239 U_CAPI void U_EXPORT2
    240 usrc_writeUTrie2Struct(FILE *f,
    241                       const char *prefix,
    242                       const UTrie2 *pTrie,
    243                       const char *indexName, const char *data32Name,
    244                       const char *postfix) {
    245    if(prefix!=nullptr) {
    246        fputs(prefix, f);
    247    }
    248    if(pTrie->data32==nullptr) {
    249        /* 16-bit trie */
    250        fprintf(
    251            f,
    252            "    %s,\n"         /* index */
    253            "    %s+%ld,\n"     /* data16 */
    254            "    nullptr,\n",      /* data32 */
    255            indexName,
    256            indexName, 
    257            (long)pTrie->indexLength);
    258    } else {
    259        /* 32-bit trie */
    260        fprintf(
    261            f,
    262            "    %s,\n"         /* index */
    263            "    nullptr,\n"       /* data16 */
    264            "    %s,\n",        /* data32 */
    265            indexName,
    266            data32Name);
    267    }
    268    fprintf(
    269        f,
    270        "    %ld,\n"            /* indexLength */
    271        "    %ld,\n"            /* dataLength */
    272        "    0x%hx,\n"          /* index2NullOffset */
    273        "    0x%hx,\n"          /* dataNullOffset */
    274        "    0x%lx,\n"          /* initialValue */
    275        "    0x%lx,\n"          /* errorValue */
    276        "    0x%lx,\n"          /* highStart */
    277        "    0x%lx,\n"          /* highValueIndex */
    278        "    nullptr, 0, false, false, 0, nullptr\n",
    279        (long)pTrie->indexLength, (long)pTrie->dataLength,
    280        (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
    281        (long)pTrie->initialValue, (long)pTrie->errorValue,
    282        (long)pTrie->highStart, (long)pTrie->highValueIndex);
    283    if(postfix!=nullptr) {
    284        fputs(postfix, f);
    285    }
    286 }
    287 
    288 U_CAPI void U_EXPORT2
    289 usrc_writeUCPTrieArrays(FILE *f,
    290                        const char *indexPrefix, const char *dataPrefix,
    291                        const UCPTrie *pTrie,
    292                        const char *postfix,
    293                        UTargetSyntax syntax) {
    294    const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? "  " : "";
    295    usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
    296    int32_t width=
    297        pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
    298        pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
    299        pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
    300    usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
    301 }
    302 
    303 U_CAPI void U_EXPORT2
    304 usrc_writeUCPTrieStruct(FILE *f,
    305                        const char *prefix,
    306                        const UCPTrie *pTrie,
    307                        const char *indexName, const char *dataName,
    308                        const char *postfix,
    309                        UTargetSyntax syntax) {
    310    if(prefix!=nullptr) {
    311        fputs(prefix, f);
    312    }
    313    if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
    314        fprintf(
    315            f,
    316            "    %s,\n"             // index
    317            "    { %s },\n",        // data (union)
    318            indexName,
    319            dataName);
    320    }
    321    const char* pattern =
    322        (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
    323        "    %ld, %ld,\n"       // indexLength, dataLength
    324        "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
    325        "    %d, %d,\n"         // type, valueWidth
    326        "    0, 0,\n"           // reserved32, reserved16
    327        "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
    328        "    0x%lx,\n"          // nullValue
    329        :
    330        "indexLength = %ld\n"
    331        "dataLength = %ld\n"
    332        "highStart = 0x%lx\n"
    333        "shifted12HighStart = 0x%x\n"
    334        "type = %d\n"
    335        "valueWidth = %d\n"
    336        "index3NullOffset = 0x%x\n"
    337        "dataNullOffset = 0x%lx\n"
    338        "nullValue = 0x%lx\n"
    339        ;
    340    fprintf(
    341        f,
    342        pattern,
    343        (long)pTrie->indexLength, (long)pTrie->dataLength,
    344        (long)pTrie->highStart, pTrie->shifted12HighStart,
    345        pTrie->type, pTrie->valueWidth,
    346        pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
    347        (long)pTrie->nullValue);
    348    if(postfix!=nullptr) {
    349        fputs(postfix, f);
    350    }
    351 }
    352 
    353 U_CAPI void U_EXPORT2
    354 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
    355    int32_t width=
    356        pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
    357        pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
    358        pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
    359    char line[100], line2[100], line3[100], line4[100];
    360 
    361    switch (syntax) {
    362    case UPRV_TARGET_SYNTAX_CCODE:
    363        snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name);
    364        snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
    365        snprintf(line3, sizeof(line3), "\n};\n\n");
    366        break;
    367    case UPRV_TARGET_SYNTAX_TOML:
    368        snprintf(line, sizeof(line), "index = [\n  ");
    369        snprintf(line2, sizeof(line2), "data_%d = [\n  ", (int)width);
    370        snprintf(line3, sizeof(line3), "\n]\n");
    371        break;
    372    default:
    373        UPRV_UNREACHABLE_EXIT;
    374    }
    375    usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
    376 
    377    switch (syntax) {
    378    case UPRV_TARGET_SYNTAX_CCODE:
    379        snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name);
    380        snprintf(line2, sizeof(line2), "%s_trieIndex", name);
    381        snprintf(line3, sizeof(line3), "%s_trieData", name);
    382        snprintf(line4, sizeof(line4), "};\n\n");
    383        break;
    384    case UPRV_TARGET_SYNTAX_TOML:
    385        line[0] = 0;
    386        line2[0] = 0;
    387        line3[0] = 0;
    388        line4[0] = 0;
    389        break;
    390    default:
    391        UPRV_UNREACHABLE_EXIT;
    392    }
    393    usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
    394 }
    395 
    396 U_CAPI void U_EXPORT2
    397 usrc_writeUnicodeSet(
    398        FILE *f,
    399        const USet *pSet,
    400        UTargetSyntax syntax) {
    401    // ccode is not yet supported
    402    U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
    403 
    404    // Write out a list of ranges
    405    const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
    406    UnicodeSetIterator it(*set);
    407    fprintf(f, "# Inclusive ranges of the code points in the set.\n");
    408    fprintf(f, "ranges = [\n");
    409    bool seenFirstString = false;
    410    while (it.nextRange()) {
    411        if (it.isString()) {
    412            if (!seenFirstString) {
    413                seenFirstString = true;
    414                fprintf(f, "]\nstrings = [\n");
    415            }
    416            const UnicodeString& str = it.getString();
    417            fprintf(f, "  ");
    418            usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
    419            fprintf(f, ",\n");
    420        } else {
    421            U_ASSERT(!seenFirstString);
    422            UChar32 start = it.getCodepoint();
    423            UChar32 end = it.getCodepointEnd();
    424            fprintf(f, "  [0x%x, 0x%x],\n", start, end);
    425        }
    426    }
    427    fprintf(f, "]\n");
    428 }
    429 
    430 U_CAPI void U_EXPORT2
    431 usrc_writeUCPMap(
    432        FILE *f,
    433        const UCPMap *pMap,
    434        icu::ValueNameGetter *valueNameGetter,
    435        UTargetSyntax syntax) {
    436    // ccode is not yet supported
    437    U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
    438    (void) syntax; // silence unused variable errors
    439 
    440    // Print out list of ranges
    441    UChar32 start = 0, end;
    442    uint32_t value;
    443    fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
    444    fprintf(f, "ranges = [\n");
    445    while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
    446        if (valueNameGetter != nullptr) {
    447            const char *name = valueNameGetter->getName(value);
    448            fprintf(f, "  {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
    449        } else {
    450            fprintf(f, "  {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
    451        }
    452        start = end + 1;
    453    }
    454    fprintf(f, "]\n");
    455 }
    456 
    457 U_CAPI void U_EXPORT2
    458 usrc_writeArrayOfMostlyInvChars(FILE *f,
    459                                const char *prefix,
    460                                const char *p, int32_t length,
    461                                const char *postfix) {
    462    int32_t i, col;
    463    int prev2, prev, c;
    464 
    465    if(prefix!=nullptr) {
    466        fprintf(f, prefix, (long)length);
    467    }
    468    prev2=prev=-1;
    469    for(i=col=0; i<length; ++i, ++col) {
    470        c=(uint8_t)p[i];
    471        if(i>0) {
    472            /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
    473            if( 
    474                /* Very long line. */
    475                col>=32 ||
    476                /* Long line, break after terminating NUL. */
    477                (col>=24 && prev2>=0x20 && prev==0) ||
    478                /* Medium-long line, break before non-NUL, non-character byte. */
    479                (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
    480            ) {
    481                fputs(",\n", f);
    482                col=0;
    483            } else {
    484                fputc(',', f);
    485            }
    486        }
    487        fprintf(f, c<0x20 ? "%u" : "'%c'", c);
    488        prev2=prev;
    489        prev=c;
    490    }
    491    if(postfix!=nullptr) {
    492        fputs(postfix, f);
    493    }
    494 }
    495 
    496 U_CAPI void U_EXPORT2
    497 usrc_writeStringAsASCII(FILE *f,
    498        const char16_t* ptr, int32_t length,
    499        UTargetSyntax) {
    500    // For now, assume all UTargetSyntax values are valid here.
    501    fprintf(f, "\"");
    502    int32_t i = 0;
    503    UChar32 cp;
    504    while (i < length) {
    505        U16_NEXT(ptr, i, length, cp);
    506        if (cp == u'"') {
    507            fprintf(f, "\\\"");
    508        } else if (ICU_Utility::isUnprintable(cp)) {
    509            UnicodeString u16result;
    510            ICU_Utility::escapeUnprintable(u16result, cp);
    511            std::string u8result;
    512            u16result.toUTF8String(u8result);
    513            fprintf(f, "%s", u8result.data());
    514        } else {
    515            U_ASSERT(cp < 0x80);
    516            char s[2] = {static_cast<char>(cp), 0};
    517            fprintf(f, "%s", s);
    518        }
    519    }
    520    fprintf(f, "\"");
    521 }