tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

icuexportdata.cpp (62501B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include <cstddef>
      5 #include <cstdint>
      6 #include <cstdio>
      7 #include <iostream>
      8 #include "unicode/localpointer.h"
      9 #include "unicode/umachine.h"
     10 #include "unicode/unistr.h"
     11 #include "unicode/urename.h"
     12 #include "unicode/uset.h"
     13 #include <vector>
     14 #include <algorithm>
     15 #include "toolutil.h"
     16 #include "uoptions.h"
     17 #include "cmemory.h"
     18 #include "charstr.h"
     19 #include "cstring.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/errorcode.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/uscript.h"
     24 #include "unicode/putil.h"
     25 #include "unicode/umutablecptrie.h"
     26 #include "unicode/ucharstriebuilder.h"
     27 #include "ucase.h"
     28 #include "unicode/normalizer2.h"
     29 #include "uprops.h"
     30 #include "normalizer2impl.h"
     31 #include "writesrc.h"
     32 
     33 U_NAMESPACE_USE
     34 
     35 /*
     36 * Global - verbosity
     37 */
     38 UBool VERBOSE = false;
     39 UBool QUIET = false;
     40 
     41 UBool haveCopyright = true;
     42 UCPTrieType trieType = UCPTRIE_TYPE_SMALL;
     43 const char* destdir = "";
     44 
     45 // Mask constants for modified values in the Script CodePointTrie, values are logically 12-bits.
     46 int16_t DATAEXPORT_SCRIPT_X_WITH_COMMON    = 0x0400;
     47 int16_t DATAEXPORT_SCRIPT_X_WITH_INHERITED = 0x0800;
     48 int16_t DATAEXPORT_SCRIPT_X_WITH_OTHER     = 0x0c00;
     49 
     50 void handleError(ErrorCode& status, int line, const char* context) {
     51    if (status.isFailure()) {
     52        std::cerr << "Error[" << line << "]: " << context << ": " << status.errorName() << std::endl;
     53        exit(status.reset());
     54    }
     55 }
     56 
     57 class PropertyValueNameGetter : public ValueNameGetter {
     58 public:
     59    PropertyValueNameGetter(UProperty prop) : property(prop) {}
     60    ~PropertyValueNameGetter() override;
     61    const char *getName(uint32_t value) override {
     62        return u_getPropertyValueName(property, value, U_SHORT_PROPERTY_NAME);
     63    }
     64 
     65 private:
     66    UProperty property;
     67 };
     68 
     69 PropertyValueNameGetter::~PropertyValueNameGetter() {}
     70 
     71 // Dump an aliases = [...] key for properties with aliases
     72 void dumpPropertyAliases(UProperty uproperty, FILE* f) {
     73    int i = U_LONG_PROPERTY_NAME + 1;
     74 
     75    while(true) {
     76        // The API works by having extra names after U_LONG_PROPERTY_NAME, sequentially,
     77        // and returning null after that
     78        const char* alias = u_getPropertyName(uproperty, static_cast<UPropertyNameChoice>(i));
     79        if (!alias) {
     80            break;
     81        }
     82        if (i == U_LONG_PROPERTY_NAME + 1) {
     83            fprintf(f, "aliases = [\"%s\"", alias);
     84        } else {
     85            fprintf(f, ", \"%s\"", alias);
     86        }
     87        i++;
     88    }
     89    if (i != U_LONG_PROPERTY_NAME + 1) {
     90        fprintf(f, "]\n");
     91    }
     92 }
     93 
     94 void dumpBinaryProperty(UProperty uproperty, FILE* f) {
     95    IcuToolErrorCode status("icuexportdata: dumpBinaryProperty");
     96    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
     97    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
     98    const USet* uset = u_getBinaryPropertySet(uproperty, status);
     99    handleError(status, __LINE__, fullPropName);
    100 
    101    fputs("[[binary_property]]\n", f);
    102    fprintf(f, "long_name = \"%s\"\n", fullPropName);
    103    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
    104    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
    105    dumpPropertyAliases(uproperty, f);
    106    usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML);
    107 }
    108 
    109 // If the value exists, dump an indented entry of the format
    110 // `"  {discr = <discriminant>, long = <longname>, short = <shortname>, aliases = [<aliases>]},"`
    111 void dumpValueEntry(UProperty uproperty, int v, bool is_mask, FILE* f) {
    112    const char* fullValueName = u_getPropertyValueName(uproperty, v, U_LONG_PROPERTY_NAME);
    113    const char* shortValueName = u_getPropertyValueName(uproperty, v, U_SHORT_PROPERTY_NAME);
    114    if (!fullValueName) {
    115        return;
    116    }
    117    if (is_mask) {
    118        fprintf(f, "  {discr = 0x%X", v);
    119    } else {
    120        fprintf(f, "  {discr = %i", v);
    121    }
    122    fprintf(f, ", long = \"%s\"", fullValueName);
    123    if (shortValueName) {
    124        fprintf(f, ", short = \"%s\"", shortValueName);
    125    }
    126    int i = U_LONG_PROPERTY_NAME + 1;
    127    while(true) {
    128        // The API works by having extra names after U_LONG_PROPERTY_NAME, sequentially,
    129        // and returning null after that
    130        const char* alias = u_getPropertyValueName(uproperty, v, static_cast<UPropertyNameChoice>(i));
    131        if (!alias) {
    132            break;
    133        }
    134        if (i == U_LONG_PROPERTY_NAME + 1) {
    135            fprintf(f, ", aliases = [\"%s\"", alias);
    136        } else {
    137            fprintf(f, ", \"%s\"", alias);
    138        }
    139        i++;
    140    }
    141    if (i != U_LONG_PROPERTY_NAME + 1) {
    142        fprintf(f, "]");
    143    }
    144    fprintf(f, "},\n");
    145 }
    146 
    147 void dumpEnumeratedProperty(UProperty uproperty, FILE* f) {
    148    IcuToolErrorCode status("icuexportdata: dumpEnumeratedProperty");
    149    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
    150    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
    151    const UCPMap* umap = u_getIntPropertyMap(uproperty, status);
    152    handleError(status, __LINE__, fullPropName);
    153 
    154    fputs("[[enum_property]]\n", f);
    155    fprintf(f, "long_name = \"%s\"\n", fullPropName);
    156    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
    157    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
    158    dumpPropertyAliases(uproperty, f);
    159 
    160    int32_t minValue = u_getIntPropertyMinValue(uproperty);
    161    U_ASSERT(minValue >= 0);
    162    int32_t maxValue = u_getIntPropertyMaxValue(uproperty);
    163    U_ASSERT(maxValue >= 0);
    164 
    165    fprintf(f, "values = [\n");
    166    for (int v = minValue; v <= maxValue; v++) {
    167        dumpValueEntry(uproperty, v, false, f);
    168    }
    169    fprintf(f, "]\n");
    170 
    171    PropertyValueNameGetter valueNameGetter(uproperty);
    172    usrc_writeUCPMap(f, umap, &valueNameGetter, UPRV_TARGET_SYNTAX_TOML);
    173    fputs("\n", f);
    174 
    175 
    176    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
    177    if (maxValue <= 0xff) {
    178        width = UCPTRIE_VALUE_BITS_8;
    179    } else if (maxValue <= 0xffff) {
    180        width = UCPTRIE_VALUE_BITS_16;
    181    }
    182    LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(umap, status));
    183    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
    184        builder.getAlias(),
    185        trieType,
    186        width,
    187        status));
    188    handleError(status, __LINE__, fullPropName);
    189 
    190    fputs("[enum_property.code_point_trie]\n", f);
    191    usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
    192 }
    193 
    194 /*
    195 * Export Bidi_Mirroring_Glyph values (code points) in a similar way to how enumerated
    196 * properties are dumped to file.
    197 * Note: the data will store 0 for code points without a value defined for
    198 * Bidi_Mirroring_Glyph.
    199 */
    200 void dumpBidiMirroringGlyph(FILE* f) {
    201    UProperty uproperty = UCHAR_BIDI_MIRRORING_GLYPH;
    202    IcuToolErrorCode status("icuexportdata: dumpBidiMirroringGlyph");
    203    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
    204    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
    205    handleError(status, __LINE__, fullPropName);
    206 
    207    // Store 21-bit code point as is
    208    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
    209 
    210    // note: unlike dumpEnumeratedProperty, which can get inversion map data using
    211    // u_getIntPropertyMap(uproperty), the only reliable way to get Bidi_Mirroring_Glyph
    212    // is to use u_charMirror(cp) over the code point space.
    213    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
    214    for(UChar32 c = UCHAR_MIN_VALUE; c <= UCHAR_MAX_VALUE; c++) {
    215        UChar32 mirroringGlyph = u_charMirror(c);
    216        // The trie builder code throws an error when it cannot compress the data sufficiently.
    217        // Therefore, when the value is undefined for a code point, keep a 0 in the trie
    218        // instead of the ICU API behavior of returning the code point value. Using 0
    219        // results in a relatively significant space savings by not including redundant data.
    220        if (c != mirroringGlyph) {
    221            umutablecptrie_set(builder.getAlias(), c, mirroringGlyph, status);
    222        }
    223    }
    224 
    225    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
    226        builder.getAlias(),
    227        trieType,
    228        width,
    229        status));
    230    handleError(status, __LINE__, fullPropName);
    231 
    232    // currently a trie and inversion map are the same (as relied upon in characterproperties.cpp)
    233    const UCPMap* umap = reinterpret_cast<UCPMap *>(utrie.getAlias());
    234 
    235    fputs("[[enum_property]]\n", f);
    236    fprintf(f, "long_name = \"%s\"\n", fullPropName);
    237    if (shortPropName) {
    238        fprintf(f, "short_name = \"%s\"\n", shortPropName);
    239    }
    240    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
    241    dumpPropertyAliases(uproperty, f);
    242 
    243    usrc_writeUCPMap(f, umap, nullptr, UPRV_TARGET_SYNTAX_TOML);
    244    fputs("\n", f);
    245 
    246    fputs("[enum_property.code_point_trie]\n", f);
    247    usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
    248 }
    249 
    250 // After printing property value `v`, print `mask` if and only if `mask` comes immediately
    251 // after the property in the listing
    252 void maybeDumpMaskValue(UProperty uproperty, uint32_t v, uint32_t mask, FILE* f) {
    253    if (U_MASK(v) < mask && U_MASK(v + 1) > mask)
    254        dumpValueEntry(uproperty, mask, true, f);
    255 }
    256 
    257 void dumpGeneralCategoryMask(FILE* f) {
    258    IcuToolErrorCode status("icuexportdata: dumpGeneralCategoryMask");
    259    UProperty uproperty = UCHAR_GENERAL_CATEGORY_MASK;
    260 
    261    fputs("[[mask_property]]\n", f);
    262    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
    263    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
    264    fprintf(f, "long_name = \"%s\"\n", fullPropName);
    265    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
    266    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
    267    dumpPropertyAliases(uproperty, f);
    268 
    269 
    270    fprintf(f, "mask_for = \"General_Category\"\n");
    271    int32_t minValue = u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY);
    272    U_ASSERT(minValue >= 0);
    273    int32_t maxValue = u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY);
    274    U_ASSERT(maxValue >= 0);
    275 
    276    fprintf(f, "values = [\n");
    277    for (int32_t v = minValue; v <= maxValue; v++) {
    278        dumpValueEntry(uproperty, U_MASK(v), true, f);
    279 
    280        // We want to dump these masks "in order", which means they
    281        // should come immediately after every property they contain
    282        maybeDumpMaskValue(uproperty, v, U_GC_L_MASK, f);
    283        maybeDumpMaskValue(uproperty, v, U_GC_LC_MASK, f);
    284        maybeDumpMaskValue(uproperty, v, U_GC_M_MASK, f);
    285        maybeDumpMaskValue(uproperty, v, U_GC_N_MASK, f);
    286        maybeDumpMaskValue(uproperty, v, U_GC_Z_MASK, f);
    287        maybeDumpMaskValue(uproperty, v, U_GC_C_MASK, f);
    288        maybeDumpMaskValue(uproperty, v, U_GC_P_MASK, f);
    289        maybeDumpMaskValue(uproperty, v, U_GC_S_MASK, f);
    290    }
    291    fprintf(f, "]\n");
    292 }
    293 
    294 namespace {
    295 
    296 void U_CALLCONV
    297 set_add(USet *set, UChar32 c) {
    298    UnicodeSet::fromUSet(set)->add(c);
    299 }
    300 
    301 void U_CALLCONV
    302 set_addRange(USet *set, UChar32 start, UChar32 end) {
    303    UnicodeSet::fromUSet(set)->add(start, end);
    304 }
    305 
    306 }
    307 
    308 UnicodeSet getScriptExtensionsCodePoints(IcuToolErrorCode &errorCode) {
    309    UnicodeSet scxCPs;
    310    USetAdder sa = {
    311        scxCPs.toUSet(),
    312        set_add,
    313        set_addRange,
    314        nullptr, // don't need addString,
    315        nullptr, // don't need remove()
    316        nullptr // don't need removeRange()
    317    };
    318    uprv_addScriptExtensionsCodePoints(&sa, errorCode);
    319    return scxCPs;
    320 }
    321 
    322 void dumpScriptExtensions(FILE* f) {
    323    IcuToolErrorCode status("icuexportdata: dumpScriptExtensions");
    324 
    325    fputs("[[script_extensions]]\n", f);
    326    const char* scxFullPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_LONG_PROPERTY_NAME);
    327    const char* scxShortPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_SHORT_PROPERTY_NAME);
    328    fprintf(f, "long_name = \"%s\"\n", scxFullPropName);
    329    if (scxShortPropName) fprintf(f, "short_name = \"%s\"\n", scxShortPropName);
    330    fprintf(f, "uproperty_discr = 0x%X\n", UCHAR_SCRIPT_EXTENSIONS);
    331    dumpPropertyAliases(UCHAR_SCRIPT_EXTENSIONS, f);
    332 
    333    // We want to use 16 bits for our exported trie of sc/scx data because we
    334    // need 12 bits to match the 12 bits of data stored for sc/scx in the trie
    335    // in the uprops.icu data file.
    336    UCPTrieValueWidth scWidth = UCPTRIE_VALUE_BITS_16;
    337 
    338    // Create a mutable UCPTrie builder populated with Script property values data.
    339    const UCPMap* scInvMap = u_getIntPropertyMap(UCHAR_SCRIPT, status);
    340    handleError(status, __LINE__, scxFullPropName);
    341    LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(scInvMap, status));
    342    handleError(status, __LINE__, scxFullPropName);
    343 
    344    // The values for the output scx companion array.
    345    // Invariant is that all subvectors are distinct.
    346    std::vector< std::vector<uint16_t> > outputDedupVec;
    347 
    348    // The sc/scx companion array is an array of arrays (of script codes)
    349    fputs("script_code_array = [\n", f);
    350    UnicodeSet scxCodePoints = getScriptExtensionsCodePoints(status);
    351    for(const UChar32 cp : scxCodePoints.codePoints()) {
    352        // Get the Script value
    353        uint32_t scVal = umutablecptrie_get(builder.getAlias(), cp);
    354        // Get the Script_Extensions value (array of Script codes)
    355        const int32_t SCX_ARRAY_CAPACITY = 32;
    356        UScriptCode scxValArray[SCX_ARRAY_CAPACITY];
    357        int32_t numScripts = uscript_getScriptExtensions(cp, scxValArray, SCX_ARRAY_CAPACITY, status);
    358        handleError(status, __LINE__, scxFullPropName);
    359 
    360        // Convert the scx array into a vector
    361        std::vector<uint16_t> scxValVec;
    362        for(int i = 0; i < numScripts; i++) {
    363            scxValVec.push_back(scxValArray[i]);
    364        }
    365        // Ensure that it is sorted
    366        std::sort(scxValVec.begin(), scxValVec.end());
    367        // Copy the Script value into the first position of the scx array only
    368        // if we have the "other" case (Script value is not Common nor Inherited).
    369        // This offers faster access when users want only the Script value.
    370        if (scVal != USCRIPT_COMMON && scVal != USCRIPT_INHERITED) {
    371            scxValVec.insert(scxValVec.begin(), scVal);
    372        }
    373 
    374        // See if there is already an scx value array matching the newly built one.
    375        // If there is, then use its index.
    376        // If not, then append the new value array.
    377        bool isScxValUnique = true;
    378        size_t outputIndex = 0;
    379        for (outputIndex = 0; outputIndex < outputDedupVec.size(); outputIndex++) {
    380            if (outputDedupVec[outputIndex] == scxValVec) {
    381                isScxValUnique = false;
    382                break;
    383            }
    384        }
    385 
    386        if (isScxValUnique) {
    387            outputDedupVec.push_back(scxValVec);
    388            usrc_writeArray(f, "  [", scxValVec.data(), 16, scxValVec.size(), "    ", "],\n");
    389        }
    390 
    391        // We must update the value in the UCPTrie for the code point to contain:
    392        // 9..0 the Script code in the lower 10 bits when 11..10 is 0, else it is
    393        //   the index into the companion array
    394        // 11..10 the same higher-order 2 bits in the trie in uprops.icu indicating whether
    395        //   3: other
    396        //   2: Script=Inherited
    397        //   1: Script=Common
    398        //   0: Script=value in 9..0 (N/A because we are in this loop to create the companion array for non-0 cases)
    399        uint16_t mask = 0;
    400        if (scVal == USCRIPT_COMMON) {
    401            mask = DATAEXPORT_SCRIPT_X_WITH_COMMON;
    402        } else if (scVal == USCRIPT_INHERITED) {
    403            mask = DATAEXPORT_SCRIPT_X_WITH_INHERITED;
    404        } else {
    405            mask = DATAEXPORT_SCRIPT_X_WITH_OTHER;
    406        }
    407 
    408        // The new trie value is the index into the new array with the high order bits set
    409        uint32_t newScVal = outputIndex | mask;
    410 
    411        // Update the code point in the mutable trie builder with the trie value
    412        umutablecptrie_set(builder.getAlias(), cp, newScVal, status);
    413        handleError(status, __LINE__, scxFullPropName);
    414    }
    415    fputs("]\n\n", f);  // Print the TOML close delimiter for the outer array.
    416 
    417    // Convert from mutable trie builder to immutable trie.
    418    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
    419        builder.getAlias(),
    420        trieType,
    421        scWidth,
    422        status));
    423    handleError(status, __LINE__, scxFullPropName);
    424 
    425    fputs("[script_extensions.code_point_trie]\n", f);
    426    usrc_writeUCPTrie(f, scxShortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
    427 }
    428 
    429 FILE* prepareOutputFile(const char* basename) {
    430    IcuToolErrorCode status("icuexportdata");
    431    CharString outFileName;
    432    if (destdir != nullptr && *destdir != 0) {
    433        outFileName.append(destdir, status).ensureEndsWithFileSeparator(status);
    434    }
    435    outFileName.append(basename, status);
    436    outFileName.append(".toml", status);
    437    handleError(status, __LINE__, basename);
    438 
    439    FILE* f = fopen(outFileName.data(), "w");
    440    if (f == nullptr) {
    441        std::cerr << "Unable to open file: " << outFileName.data() << std::endl;
    442        exit(U_FILE_ACCESS_ERROR);
    443    }
    444    if (!QUIET) {
    445        std::cout << "Writing to: " << outFileName.data() << std::endl;
    446    }
    447 
    448    if (haveCopyright) {
    449        usrc_writeCopyrightHeader(f, "#", 2021);
    450    }
    451    usrc_writeFileNameGeneratedBy(f, "#", basename, "icuexportdata.cpp");
    452 
    453    return f;
    454 }
    455 
    456 #if !UCONFIG_NO_NORMALIZATION
    457 
    458 class PendingDescriptor {
    459 public:
    460    UChar32 scalar;
    461    uint32_t descriptorOrFlags;
    462    // If false, we use the above fields only. If true, descriptor only
    463    // contains the two highest-bit flags and the rest is computed later
    464    // from the fields below.
    465    UBool complex;
    466    UBool supplementary;
    467    UBool onlyNonStartersInTrail;
    468    uint32_t len;
    469    uint32_t offset;
    470 
    471    PendingDescriptor(UChar32 scalar, uint32_t descriptor);
    472    PendingDescriptor(UChar32 scalar, uint32_t flags, UBool supplementary, UBool onlyNonStartersInTrail, uint32_t len, uint32_t offset);
    473 };
    474 
    475 PendingDescriptor::PendingDescriptor(UChar32 scalar, uint32_t descriptor)
    476 : scalar(scalar), descriptorOrFlags(descriptor), complex(false), supplementary(false), onlyNonStartersInTrail(false), len(0), offset(0) {}
    477 
    478 PendingDescriptor::PendingDescriptor(UChar32 scalar, uint32_t flags, UBool supplementary, UBool onlyNonStartersInTrail, uint32_t len, uint32_t offset)
    479 : scalar(scalar), descriptorOrFlags(flags), complex(true), supplementary(supplementary), onlyNonStartersInTrail(onlyNonStartersInTrail), len(len), offset(offset) {}
    480 
    481 void writeCanonicalCompositions(USet* backwardCombiningStarters) {
    482    IcuToolErrorCode status("icuexportdata: computeCanonicalCompositions");
    483    const char* basename = "compositions";
    484    FILE* f = prepareOutputFile(basename);
    485 
    486    LocalPointer<UCharsTrieBuilder> backwardBuilder(new UCharsTrieBuilder(status), status);
    487 
    488    const int32_t DECOMPOSITION_BUFFER_SIZE = 20;
    489    UChar32 utf32[DECOMPOSITION_BUFFER_SIZE];
    490 
    491    const Normalizer2* nfc = Normalizer2::getNFCInstance(status);
    492    for (UChar32 c = 0; c <= 0x10FFFF; ++c) {
    493        if (c >= 0xD800 && c < 0xE000) {
    494            // Surrogate
    495            continue;
    496        }
    497        UnicodeString decomposition;
    498        if (!nfc->getRawDecomposition(c, decomposition)) {
    499            continue;
    500        }
    501        int32_t len = decomposition.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
    502        if (len != 2) {
    503            continue;
    504        }
    505        UChar32 starter = utf32[0];
    506        UChar32 second = utf32[1];
    507        UChar32 composite = nfc->composePair(starter, second);
    508        if (composite < 0) {
    509            continue;
    510        }
    511        if (c != composite) {
    512            status.set(U_INTERNAL_PROGRAM_ERROR);
    513            handleError(status, __LINE__, basename);
    514        }
    515        if (!u_getCombiningClass(second)) {
    516            uset_add(backwardCombiningStarters, second);
    517        }
    518        if (composite >= 0xAC00 && composite <= 0xD7A3) {
    519            // Hangul syllable
    520            continue;
    521        }
    522 
    523        UnicodeString backward;
    524        backward.append(second);
    525        backward.append(starter);
    526        backwardBuilder->add(backward, static_cast<int32_t>(composite), status);
    527    }
    528    UnicodeString canonicalCompositionTrie;
    529    backwardBuilder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, canonicalCompositionTrie, status);
    530 
    531    usrc_writeArray(f, "compositions = [\n  ", canonicalCompositionTrie.getBuffer(), 16, canonicalCompositionTrie.length(), "  ", "\n]\n");
    532    fclose(f);
    533    handleError(status, __LINE__, basename);
    534 }
    535 
    536 void writeDecompositionTables(const char* basename, const uint16_t* ptr16, size_t len16, const uint32_t* ptr32, size_t len32) {
    537    FILE* f = prepareOutputFile(basename);
    538    usrc_writeArray(f, "scalars16 = [\n  ", ptr16, 16, len16, "  ", "\n]\n");
    539    usrc_writeArray(f, "scalars32 = [\n  ", ptr32, 32, len32, "  ", "\n]\n");
    540    fclose(f);
    541 }
    542 
    543 void pendingInsertionsToTrie(const char* basename, UMutableCPTrie* trie, const std::vector<PendingDescriptor>& pendingTrieInsertions, uint32_t baseSize16, uint32_t baseSize32, uint32_t supplementSize16) {
    544    IcuToolErrorCode status("icuexportdata: pendingInsertionsToTrie");
    545    // Iterate backwards to insert lower code points in the trie first in case it matters
    546    // for trie block allocation.
    547    for (int32_t i = pendingTrieInsertions.size() - 1; i >= 0; --i) {
    548        const PendingDescriptor& pending = pendingTrieInsertions[i];
    549        if (pending.complex) {
    550            uint32_t additional = 0;
    551            uint32_t offset = pending.offset;
    552            uint32_t len = pending.len;
    553            if (!pending.supplementary) {
    554                len -= 2;
    555                if (offset >= baseSize16) {
    556                    // This is a offset to supplementary 16-bit data. We have
    557                    // 16-bit base data and 32-bit base data before. However,
    558                    // the 16-bit base data length is already part of offset.
    559                    additional = baseSize32;
    560                }
    561            } else {
    562                len -= 1;
    563                if (offset >= baseSize32) {
    564                    // This is an offset to supplementary 32-bit data. We have 16-bit
    565                    // base data, 32-bit base data, and 16-bit supplementary data before.
    566                    // However, the 32-bit base data length is already part
    567                    // of offset.
    568                    additional = baseSize16 + supplementSize16;
    569                } else {
    570                    // This is an offset to 32-bit base data. We have 16-bit
    571                    // base data before.
    572                    additional = baseSize16;
    573                }
    574            }
    575            // +1 to make offset always non-zero
    576            offset += 1;
    577            if (offset + additional > 0xFFF) {
    578                status.set(U_INTERNAL_PROGRAM_ERROR);
    579                handleError(status, __LINE__, basename);
    580            }
    581            if (len > 7) {
    582                status.set(U_INTERNAL_PROGRAM_ERROR);
    583                handleError(status, __LINE__, basename);
    584            }
    585            umutablecptrie_set(trie, pending.scalar, pending.descriptorOrFlags | (uint32_t(pending.onlyNonStartersInTrail) << 4) | len | (offset + additional) << 16, status);
    586        } else {
    587            umutablecptrie_set(trie, pending.scalar, pending.descriptorOrFlags, status);
    588        }
    589    }
    590 }
    591 
    592 /// Marker that the decomposition does not round trip via NFC.
    593 const uint32_t NON_ROUND_TRIP_MASK = (1 << 30);
    594 
    595 /// Marker that the first character of the decomposition can combine
    596 /// backwards.
    597 const uint32_t BACKWARD_COMBINING_MASK = (1 << 31);
    598 
    599 void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t baseSize32, uint32_t supplementSize16, USet* uset, USet* reference, const std::vector<PendingDescriptor>& pendingTrieInsertions, const std::vector<PendingDescriptor>& nfdPendingTrieInsertions, char16_t passthroughCap) {
    600    IcuToolErrorCode status("icuexportdata: writeDecompositionData");
    601    FILE* f = prepareOutputFile(basename);
    602 
    603    // Zero is a magic number that means the character decomposes to itself.
    604    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
    605 
    606    if (uprv_strcmp(basename, "uts46d") != 0) {
    607        // Make surrogates decompose to U+FFFD. Don't do this for UTS 46, since this
    608        // optimization is only used by the UTF-16 slice mode, and UTS 46 is not
    609        // supported in slice modes (which do not support ignorables).
    610        // Mark these as potentially backward-combining, to make lead surrogates
    611        // for non-BMP characters that are backward-combining count as
    612        // backward-combining just in case, though the backward-combiningness
    613        // is not actually being looked at today.
    614        umutablecptrie_setRange(builder.getAlias(), 0xD800, 0xDFFF, NON_ROUND_TRIP_MASK | BACKWARD_COMBINING_MASK | 0xFFFD, status);
    615    }
    616 
    617    // Add a marker value for Hangul syllables
    618    umutablecptrie_setRange(builder.getAlias(), 0xAC00, 0xD7A3, 1, status);
    619 
    620    // First put the NFD data in the trie, to be partially overwritten in the NFKD and UTS 46 cases.
    621    // This is easier that changing the logic that computes the pending insertions.
    622    pendingInsertionsToTrie(basename, builder.getAlias(), nfdPendingTrieInsertions, baseSize16, baseSize32, supplementSize16);
    623    pendingInsertionsToTrie(basename, builder.getAlias(), pendingTrieInsertions, baseSize16, baseSize32, supplementSize16);
    624    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
    625        builder.getAlias(),
    626        trieType,
    627        UCPTRIE_VALUE_BITS_32,
    628        status));
    629    handleError(status, __LINE__, basename);
    630 
    631    // The ICU4X side has changed enough this whole block of expectation checking might be more appropriate to remove.
    632    if (reference) {
    633        if (uset_contains(reference, 0xFF9E) || uset_contains(reference, 0xFF9F) || !uset_contains(reference, 0x0345)) {
    634            // NFD expectations don't hold. The set must not contain the half-width
    635            // kana voicing marks and must contain iota subscript.
    636            status.set(U_INTERNAL_PROGRAM_ERROR);
    637            handleError(status, __LINE__, basename);
    638        }
    639 
    640        USet* halfWidthVoicing = uset_openEmpty();
    641        uset_add(halfWidthVoicing, 0xFF9E);
    642        uset_add(halfWidthVoicing, 0xFF9F);
    643 
    644        USet* iotaSubscript = uset_openEmpty();
    645        uset_add(iotaSubscript, 0x0345);
    646 
    647        USet* halfWidthCheck = uset_cloneAsThawed(uset);
    648        uset_removeAll(halfWidthCheck, reference);
    649        if (!uset_equals(halfWidthCheck, halfWidthVoicing) && !uset_isEmpty(halfWidthCheck)) {
    650            // The result was neither empty nor contained exactly
    651            // the two half-width voicing marks. The ICU4X
    652            // normalizer doesn't know how to deal with this case.
    653            status.set(U_INTERNAL_PROGRAM_ERROR);
    654            handleError(status, __LINE__, basename);
    655        }
    656        uset_close(halfWidthCheck);
    657 
    658        USet* iotaCheck = uset_cloneAsThawed(reference);
    659        uset_removeAll(iotaCheck, uset);
    660        if (!(uset_equals(iotaCheck, iotaSubscript)) && !uset_isEmpty(iotaCheck)) {
    661            // The result was neither empty nor contained exactly
    662            // the iota subscript. The ICU4X normalizer doesn't
    663            // know how to deal with this case.
    664            status.set(U_INTERNAL_PROGRAM_ERROR);
    665            handleError(status, __LINE__, basename);
    666        }
    667 
    668        uset_close(iotaSubscript);
    669        uset_close(halfWidthVoicing);
    670    }
    671    fprintf(f, "cap = 0x%X\n", passthroughCap);
    672    fprintf(f, "[trie]\n");
    673    usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
    674    fclose(f);
    675    handleError(status, __LINE__, basename);
    676 }
    677 
    678 // Find the slice `needle` within `storage` and return its index, failing which,
    679 // append all elements of `needle` to `storage` and return the index of it at the end.
    680 template<typename T>
    681 size_t findOrAppend(std::vector<T>& storage, const UChar32* needle, size_t needleLen) {
    682    // Last index where we might find the start of the complete needle.
    683    // bounds check is `i + needleLen <= storage.size()` since the inner
    684    // loop will range from `i` to `i + needleLen - 1` (the `-1` is why we use `<=`)
    685    for (size_t i = 0; i + needleLen <= storage.size(); i++) {
    686        for (size_t j = 0;; j++) {
    687            if (j == needleLen) {
    688                return i;  // found a match
    689            }
    690            if (storage[i + j] != static_cast<uint32_t>(needle[j])) {
    691                break;
    692            }
    693        }
    694    }
    695    // We didn't find anything. Append, keeping the append index in mind.
    696    size_t index = storage.size();
    697    for(size_t i = 0; i < needleLen; i++) {
    698        storage.push_back(static_cast<T>(needle[i]));
    699    }
    700 
    701    return index;
    702 }
    703 
    704 
    705 // Computes data for canonical decompositions
    706 // See components/normalizer/trie-value-format.md in the ICU4X repo
    707 // for documentation of the trie value format.
    708 void computeDecompositions(const char* basename,
    709                           const USet* backwardCombiningStarters,
    710                           std::vector<uint16_t>& storage16,
    711                           std::vector<uint32_t>& storage32,
    712                           USet* decompositionStartsWithNonStarter,
    713                           USet* decompositionStartsWithBackwardCombiningStarter,
    714                           std::vector<PendingDescriptor>& pendingTrieInsertions,
    715                           UChar32& decompositionPassthroughBound,
    716                           UChar32& compositionPassthroughBound) {
    717    IcuToolErrorCode status("icuexportdata: computeDecompositions");
    718    const Normalizer2* mainNormalizer;
    719    const Normalizer2* nfdNormalizer = Normalizer2::getNFDInstance(status);
    720    const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status);
    721    FILE* f = nullptr;
    722    std::vector<uint32_t> nonRecursive32;
    723    LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
    724 
    725    UBool uts46 = false;
    726 
    727    if (uprv_strcmp(basename, "nfkd") == 0) {
    728        mainNormalizer = Normalizer2::getNFKDInstance(status);
    729    } else if (uprv_strcmp(basename, "uts46d") == 0) {
    730        uts46 = true;
    731        mainNormalizer = Normalizer2::getInstance(nullptr, "uts46", UNORM2_COMPOSE, status);
    732    } else {
    733        mainNormalizer = nfdNormalizer;
    734        f = prepareOutputFile("decompositionex");
    735    }
    736 
    737    // Max length as of Unicode 14 is 4 for NFD. For NFKD the max
    738    // is 18 (U+FDFA; special-cased), and the next longest is 8 (U+FDFB).
    739    const int32_t LONGEST_ENCODABLE_LENGTH_16 = 9;
    740    const int32_t LONGEST_ENCODABLE_LENGTH_32 = 8;
    741    const int32_t DECOMPOSITION_BUFFER_SIZE = 20;
    742    UChar32 utf32[DECOMPOSITION_BUFFER_SIZE];
    743    const int32_t RAW_DECOMPOSITION_BUFFER_SIZE = 2;
    744    UChar32 rawUtf32[RAW_DECOMPOSITION_BUFFER_SIZE];
    745 
    746    // Iterate over all scalar values excluding Hangul syllables.
    747    //
    748    // We go backwards in order to better find overlapping decompositions.
    749    //
    750    // As of Unicode 14:
    751    // Iterate forward without overlap search:
    752    // nfd: 16 size: 896, 32 size: 173
    753    // nfkd: 16 size: 3854, 32 size: 179
    754    //
    755    // Iterate forward with overlap search:
    756    // nfd: 16 size: 888, 32 size: 173
    757    // nfkd: 16 size: 3266, 32 size: 179
    758    //
    759    // Iterate backward with overlap search:
    760    // nfd: 16 size: 776, 32 size: 173
    761    // nfkd: 16 size: 2941, 32 size: 179
    762    //
    763    // UChar32 is signed!
    764    for (UChar32 c = 0x10FFFF; c >= 0; --c) {
    765        if (c >= 0xAC00 && c <= 0xD7A3) {
    766            // Hangul syllable
    767            continue;
    768        }
    769        if (c >= 0xD800 && c < 0xE000) {
    770            // Surrogate
    771            continue;
    772        }
    773        if (c == 0xFFFD) {
    774            // REPLACEMENT CHARACTER
    775            // This character is a starter that decomposes to self,
    776            // so without a special case here it would end up as
    777            // passthrough-eligible in all normalizations forms.
    778            // However, in the potentially-ill-formed UTF-8 case
    779            // UTF-8 errors return U+FFFD from the iterator, and
    780            // errors need to be treated as ineligible for
    781            // passthrough on the slice fast path. By giving
    782            // U+FFFD a trie value whose flags make it ineligible
    783            // for passthrough avoids a specific U+FFFD branch on
    784            // the passthrough fast path.
    785            pendingTrieInsertions.push_back({c, NON_ROUND_TRIP_MASK | BACKWARD_COMBINING_MASK});
    786            continue;
    787        }
    788        UnicodeString src;
    789        UnicodeString dst;
    790        src.append(c);
    791        if (mainNormalizer != nfdNormalizer) {
    792            UnicodeString inter;
    793            mainNormalizer->normalize(src, inter, status);
    794            nfdNormalizer->normalize(inter, dst, status);
    795        } else {
    796            nfdNormalizer->normalize(src, dst, status);
    797        }
    798 
    799        UnicodeString nfc;
    800        nfcNormalizer->normalize(dst, nfc, status);
    801        UBool roundTripsViaCanonicalComposition = (src == nfc);
    802 
    803        int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
    804 
    805        if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
    806            if (!uts46) {
    807                status.set(U_INTERNAL_PROGRAM_ERROR);
    808                handleError(status, __LINE__, basename);
    809            }
    810        }
    811        if (len > DECOMPOSITION_BUFFER_SIZE) {
    812            status.set(U_INTERNAL_PROGRAM_ERROR);
    813            handleError(status, __LINE__, basename);
    814        }
    815        uint8_t firstCombiningClass = u_getCombiningClass(utf32[0]);
    816        bool specialNonStarterDecomposition = false;
    817        bool startsWithBackwardCombiningStarter = false;
    818        if (firstCombiningClass) {
    819            decompositionPassthroughBound = c;
    820            compositionPassthroughBound = c;
    821            uset_add(decompositionStartsWithNonStarter, c);
    822            if (src != dst) {
    823                if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || (c == 0xFF9E && utf32[0] == 0x3099) || (c == 0xFF9F && utf32[0] == 0x309A)) {
    824                    specialNonStarterDecomposition = true;
    825                } else {
    826                    // A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
    827                    status.set(U_INTERNAL_PROGRAM_ERROR);
    828                    handleError(status, __LINE__, basename);
    829                }
    830            }
    831        } else if (uset_contains(backwardCombiningStarters, utf32[0])) {
    832            compositionPassthroughBound = c;
    833            startsWithBackwardCombiningStarter = true;
    834            uset_add(decompositionStartsWithBackwardCombiningStarter, c);
    835        }
    836        if (mainNormalizer != nfdNormalizer) {
    837            UnicodeString nfd;
    838            nfdNormalizer->normalize(src, nfd, status);
    839            if (dst == nfd) {
    840                continue;
    841            }
    842            decompositionPassthroughBound = c;
    843            compositionPassthroughBound = c;
    844        }
    845        if (firstCombiningClass) {
    846            len = 1;
    847            if (specialNonStarterDecomposition) {
    848                // Special marker
    849                pendingTrieInsertions.push_back({c, NON_ROUND_TRIP_MASK | BACKWARD_COMBINING_MASK | 0xD900 | u_getCombiningClass(c)});
    850            } else {
    851                // Use the surrogate range to store the canonical combining class
    852                // XXX: Should non-started that decompose to self be marked as non-round-trippable in
    853                // case such semantics turn out to be more useful for `NON_ROUND_TRIP_MASK`?
    854                pendingTrieInsertions.push_back({c, BACKWARD_COMBINING_MASK | 0xD800 | static_cast<uint32_t>(firstCombiningClass)});
    855            }
    856            continue;
    857        } else {
    858            if (src == dst) {
    859                if (startsWithBackwardCombiningStarter) {
    860                    pendingTrieInsertions.push_back({c, BACKWARD_COMBINING_MASK});
    861                }
    862                continue;
    863            }
    864            decompositionPassthroughBound = c;
    865            // ICU4X hard-codes ANGSTROM SIGN
    866            if (c != 0x212B && mainNormalizer == nfdNormalizer) {
    867                UnicodeString raw;
    868                if (!nfdNormalizer->getRawDecomposition(c, raw)) {
    869                    // We're always supposed to have a non-recursive decomposition
    870                    // if we had a recursive one.
    871                    status.set(U_INTERNAL_PROGRAM_ERROR);
    872                    handleError(status, __LINE__, basename);
    873                }
    874                // In addition to actual difference, put the whole range that contains characters
    875                // with oxia into the non-recursive trie in order to catch cases where characters
    876                // with oxia have singleton decompositions to corresponding characters with tonos.
    877                // This way, the run-time decision to fall through can be done on the range
    878                // without checking for individual characters inside the range.
    879                if (raw != dst || (c >= 0x1F71 && c <= 0x1FFB)) {
    880                    int32_t rawLen = raw.toUTF32(rawUtf32, RAW_DECOMPOSITION_BUFFER_SIZE, status);
    881                    if (!rawLen) {
    882                        status.set(U_INTERNAL_PROGRAM_ERROR);
    883                        handleError(status, __LINE__, basename);
    884                    }
    885                    if (rawLen == 1) {
    886                        if (c >= 0xFFFF) {
    887                            status.set(U_INTERNAL_PROGRAM_ERROR);
    888                            handleError(status, __LINE__, basename);
    889                        }
    890                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, static_cast<uint32_t>(rawUtf32[0]), status);
    891                    } else if (rawUtf32[0] <= 0xFFFF && rawUtf32[1] <= 0xFFFF) {
    892                        if (!rawUtf32[0] || !rawUtf32[1]) {
    893                            status.set(U_INTERNAL_PROGRAM_ERROR);
    894                            handleError(status, __LINE__, basename);
    895                        }
    896                        // Swapped for consistency with the primary trie
    897                        uint32_t bmpPair = static_cast<uint32_t>(rawUtf32[1]) << 16 | static_cast<uint32_t>(rawUtf32[0]);
    898                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, bmpPair, status);
    899                    } else {
    900                        // Let's add 1 to index to make it always non-zero to distinguish
    901                        // it from the default zero.
    902                        uint32_t index = nonRecursive32.size() + 1;
    903                        nonRecursive32.push_back(static_cast<uint32_t>(rawUtf32[0]));
    904                        nonRecursive32.push_back(static_cast<uint32_t>(rawUtf32[1]));
    905                        if (index > 0xFFFF) {
    906                            status.set(U_INTERNAL_PROGRAM_ERROR);
    907                            handleError(status, __LINE__, basename);
    908                        }
    909                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, index << 16, status);
    910                    }
    911                }
    912            }
    913        }
    914        if (!roundTripsViaCanonicalComposition) {
    915            compositionPassthroughBound = c;
    916        }
    917        if (!len) {
    918            if (!uts46) {
    919                status.set(U_INTERNAL_PROGRAM_ERROR);
    920                handleError(status, __LINE__, basename);
    921            }
    922            pendingTrieInsertions.push_back({c, uint32_t(0xFFFFFFFF)});
    923        } else if (len == 1 && ((utf32[0] >= 0x1161 && utf32[0] <= 0x1175) || (utf32[0] >= 0x11A8 && utf32[0] <= 0x11C2))) {
    924            // Singleton decompositions to conjoining jamo.
    925            if (mainNormalizer == nfdNormalizer) {
    926                // Not supposed to happen in NFD
    927                status.set(U_INTERNAL_PROGRAM_ERROR);
    928                handleError(status, __LINE__, basename);
    929            }
    930            pendingTrieInsertions.push_back({c, static_cast<uint32_t>(utf32[0]) | NON_ROUND_TRIP_MASK | (startsWithBackwardCombiningStarter ? BACKWARD_COMBINING_MASK : 0)});
    931        } else if (!startsWithBackwardCombiningStarter && len == 1 && utf32[0] <= 0xFFFF) {
    932            pendingTrieInsertions.push_back({c, static_cast<uint32_t>(utf32[0]) | NON_ROUND_TRIP_MASK | (startsWithBackwardCombiningStarter ? BACKWARD_COMBINING_MASK : 0)});
    933        } else if (c != 0x212B && // ANGSTROM SIGN is special to make the Harfbuzz case branch less in the more common case.
    934                   !startsWithBackwardCombiningStarter &&
    935                   len == 2 &&
    936                   utf32[0] <= 0x7FFF &&
    937                   utf32[1] <= 0x7FFF &&
    938                   utf32[0] > 0x1F &&
    939                   utf32[1] > 0x1F &&
    940                   !u_getCombiningClass(utf32[0]) &&
    941                   u_getCombiningClass(utf32[1])) {
    942            for (int32_t i = 0; i < len; ++i) {
    943                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
    944                    // Assert that iota subscript and half-width voicing marks never occur in these
    945                    // expansions in the normalization forms where they are special.
    946                    status.set(U_INTERNAL_PROGRAM_ERROR);
    947                    handleError(status, __LINE__, basename);
    948                }
    949            }
    950            pendingTrieInsertions.push_back({c, static_cast<uint32_t>(utf32[0]) | (static_cast<uint32_t>(utf32[1]) << 15) | (roundTripsViaCanonicalComposition ? 0 : NON_ROUND_TRIP_MASK)});
    951        } else {
    952            UBool supplementary = false;
    953            UBool nonInitialStarter = false;
    954            for (int32_t i = 0; i < len; ++i) {
    955                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
    956                    // Assert that iota subscript and half-width voicing marks never occur in these
    957                    // expansions in the normalization forms where they are special.
    958                    status.set(U_INTERNAL_PROGRAM_ERROR);
    959                    handleError(status, __LINE__, basename);
    960                }
    961 
    962                if (utf32[i] > 0xFFFF) {
    963                    supplementary = true;
    964                }
    965                if (utf32[i] == 0) {
    966                    status.set(U_INTERNAL_PROGRAM_ERROR);
    967                    handleError(status, __LINE__, basename);
    968                }
    969                if (i != 0 && !u_getCombiningClass(utf32[i])) {
    970                    nonInitialStarter = true;
    971                }
    972            }
    973            if (len == 1) {
    974                // The format doesn't allow for length 1 for BMP,
    975                // so if these ever occur, they need to be promoted
    976                // to wider storage. As of Unicode 16 alpha, this
    977                // case does not arise.
    978                supplementary = true;
    979            }
    980            if (!supplementary) {
    981                if (len > LONGEST_ENCODABLE_LENGTH_16 || !len || len == 1) {
    982                    if (len == 18 && c == 0xFDFA) {
    983                        // Special marker for the one character whose decomposition
    984                        // is too long. (Too long even if we took the fourth bit into use!)
    985                        pendingTrieInsertions.push_back({c, NON_ROUND_TRIP_MASK | 1});
    986                        continue;
    987                    } else {
    988                        // Note: There's a fourth bit available, but let's error out
    989                        // if it's ever needed so that it doesn't get used without
    990                        // updating docs.
    991                        status.set(U_INTERNAL_PROGRAM_ERROR);
    992                        handleError(status, __LINE__, basename);
    993                    }
    994                }
    995            } else if (len > LONGEST_ENCODABLE_LENGTH_32 || !len) {
    996                // Note: There's a fourth bit available, but let's error out
    997                // if it's ever needed so that it doesn't get used without
    998                // updating docs.
    999                status.set(U_INTERNAL_PROGRAM_ERROR);
   1000                handleError(status, __LINE__, basename);
   1001            }
   1002 
   1003            size_t index = 0;
   1004            if (!supplementary) {
   1005                index = findOrAppend(storage16, utf32, len);
   1006            } else {
   1007                index = findOrAppend(storage32, utf32, len);
   1008            }
   1009            pendingTrieInsertions.push_back({c, (startsWithBackwardCombiningStarter ? BACKWARD_COMBINING_MASK : 0) | (roundTripsViaCanonicalComposition ? 0 : NON_ROUND_TRIP_MASK), supplementary, !nonInitialStarter, uint32_t(len), uint32_t(index)});
   1010        }
   1011    }
   1012    if (storage16.size() + storage32.size() > 0xFFF) {
   1013        // We actually have 14 bits available, but let's error out so
   1014        // that docs can be updated when taking a reserved bit out of
   1015        // potential future flag usage.
   1016        status.set(U_INTERNAL_PROGRAM_ERROR);
   1017    }
   1018    if (f) {
   1019        usrc_writeArray(f, "scalars32 = [\n  ", nonRecursive32.data(), 32, nonRecursive32.size(), "  ", "\n]\n");
   1020 
   1021        LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
   1022            nonRecursiveBuilder.getAlias(),
   1023            trieType,
   1024            UCPTRIE_VALUE_BITS_32,
   1025            status));
   1026        handleError(status, __LINE__, basename);
   1027 
   1028        fprintf(f, "[trie]\n");
   1029        usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
   1030 
   1031        fclose(f);
   1032    }
   1033    handleError(status, __LINE__, basename);
   1034 }
   1035 
   1036 #endif // !UCONFIG_NO_NORMALIZATION
   1037 
   1038 enum {
   1039    OPT_HELP_H,
   1040    OPT_HELP_QUESTION_MARK,
   1041    OPT_MODE,
   1042    OPT_TRIE_TYPE,
   1043    OPT_VERSION,
   1044    OPT_DESTDIR,
   1045    OPT_ALL,
   1046    OPT_INDEX,
   1047    OPT_COPYRIGHT,
   1048    OPT_VERBOSE,
   1049    OPT_QUIET,
   1050 
   1051    OPT_COUNT
   1052 };
   1053 
   1054 #define UOPTION_MODE UOPTION_DEF("mode", 'm', UOPT_REQUIRES_ARG)
   1055 #define UOPTION_TRIE_TYPE UOPTION_DEF("trie-type", '\1', UOPT_REQUIRES_ARG)
   1056 #define UOPTION_ALL UOPTION_DEF("all", '\1', UOPT_NO_ARG)
   1057 #define UOPTION_INDEX UOPTION_DEF("index", '\1', UOPT_NO_ARG)
   1058 
   1059 static UOption options[]={
   1060    UOPTION_HELP_H,
   1061    UOPTION_HELP_QUESTION_MARK,
   1062    UOPTION_MODE,
   1063    UOPTION_TRIE_TYPE,
   1064    UOPTION_VERSION,
   1065    UOPTION_DESTDIR,
   1066    UOPTION_ALL,
   1067    UOPTION_INDEX,
   1068    UOPTION_COPYRIGHT,
   1069    UOPTION_VERBOSE,
   1070    UOPTION_QUIET,
   1071 };
   1072 
   1073 void printHelp(FILE* stdfile, const char* program) {
   1074  fprintf(stdfile,
   1075          "usage: %s -m mode [-options] [--all | properties...]\n"
   1076          "\tdump Unicode property data to .toml files\n"
   1077          "options:\n"
   1078          "\t-h or -? or --help  this usage text\n"
   1079          "\t-V or --version     show a version message\n"
   1080          "\t-m or --mode        mode: currently only 'uprops', 'ucase', and 'norm', but more may be added\n"
   1081          "\t      --trie-type   set the trie type (small or fast, default small)\n"
   1082          "\t-d or --destdir     destination directory, followed by the path\n"
   1083          "\t      --all         write out all properties known to icuexportdata\n"
   1084          "\t      --index       write an _index.toml summarizing all data exported\n"
   1085          "\t-c or --copyright   include a copyright notice\n"
   1086          "\t-v or --verbose     Turn on verbose output\n"
   1087          "\t-q or --quiet       do not display warnings and progress\n",
   1088          program);
   1089 }
   1090 
   1091 int exportUprops(int argc, char* argv[]) {
   1092    // Load list of Unicode properties
   1093    std::vector<const char*> propNames;
   1094    for (int i=1; i<argc; i++) {
   1095        propNames.push_back(argv[i]);
   1096    }
   1097    if (options[OPT_ALL].doesOccur) {
   1098        int i = UCHAR_BINARY_START;
   1099        while (true) {
   1100            if (i == UCHAR_BINARY_LIMIT) {
   1101                i = UCHAR_INT_START;
   1102            }
   1103            if (i == UCHAR_INT_LIMIT) {
   1104                i = UCHAR_GENERAL_CATEGORY_MASK;
   1105            }
   1106            if (i == UCHAR_GENERAL_CATEGORY_MASK + 1) {
   1107                i = UCHAR_BIDI_MIRRORING_GLYPH;
   1108            }
   1109            if (i == UCHAR_BIDI_MIRRORING_GLYPH + 1) {
   1110                i = UCHAR_SCRIPT_EXTENSIONS;
   1111            }
   1112            if (i == UCHAR_SCRIPT_EXTENSIONS + 1) {
   1113                break;
   1114            }
   1115            UProperty uprop = static_cast<UProperty>(i);
   1116            const char* propName = u_getPropertyName(uprop, U_SHORT_PROPERTY_NAME);
   1117            if (propName == nullptr) {
   1118                propName = u_getPropertyName(uprop, U_LONG_PROPERTY_NAME);
   1119                if (propName != nullptr && VERBOSE) {
   1120                    std::cerr << "Note: falling back to long name for: " << propName << std::endl;
   1121                }
   1122            }
   1123            if (propName != nullptr) {
   1124                propNames.push_back(propName);
   1125            } else {
   1126                std::cerr << "Warning: Could not find name for: " << uprop << std::endl;
   1127            }
   1128            i++;
   1129        }
   1130    }
   1131 
   1132    if (propNames.empty()
   1133            || options[OPT_HELP_H].doesOccur
   1134            || options[OPT_HELP_QUESTION_MARK].doesOccur
   1135            || !options[OPT_MODE].doesOccur) {
   1136        FILE *stdfile=argc<0 ? stderr : stdout;
   1137        fprintf(stdfile,
   1138            "usage: %s -m uprops [-options] [--all | properties...]\n"
   1139            "\tdump Unicode property data to .toml files\n"
   1140            "options:\n"
   1141            "\t-h or -? or --help  this usage text\n"
   1142            "\t-V or --version     show a version message\n"
   1143            "\t-m or --mode        mode: currently only 'uprops', but more may be added\n"
   1144            "\t      --trie-type   set the trie type (small or fast, default small)\n"
   1145            "\t-d or --destdir     destination directory, followed by the path\n"
   1146            "\t      --all         write out all properties known to icuexportdata\n"
   1147            "\t      --index       write an _index.toml summarizing all data exported\n"
   1148            "\t-c or --copyright   include a copyright notice\n"
   1149            "\t-v or --verbose     Turn on verbose output\n"
   1150            "\t-q or --quiet       do not display warnings and progress\n",
   1151            argv[0]);
   1152        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1153    }
   1154 
   1155    const char* mode = options[OPT_MODE].value;
   1156    if (uprv_strcmp(mode, "uprops") != 0) {
   1157        fprintf(stderr, "Invalid option for --mode (must be uprops)\n");
   1158        return U_ILLEGAL_ARGUMENT_ERROR;
   1159    }
   1160 
   1161    if (options[OPT_TRIE_TYPE].doesOccur) {
   1162        if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
   1163            trieType = UCPTRIE_TYPE_FAST;
   1164        } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
   1165            trieType = UCPTRIE_TYPE_SMALL;
   1166        } else {
   1167            fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
   1168            return U_ILLEGAL_ARGUMENT_ERROR;
   1169        }
   1170    }
   1171 
   1172    for (const char* propName : propNames) {
   1173        UProperty propEnum = u_getPropertyEnum(propName);
   1174        if (propEnum == UCHAR_INVALID_CODE) {
   1175            std::cerr << "Error: Invalid property alias: " << propName << std::endl;
   1176            return U_ILLEGAL_ARGUMENT_ERROR;
   1177        }
   1178 
   1179        FILE* f = prepareOutputFile(propName);
   1180 
   1181        UVersionInfo versionInfo;
   1182        u_getUnicodeVersion(versionInfo);
   1183        char uvbuf[U_MAX_VERSION_STRING_LENGTH];
   1184        u_versionToString(versionInfo, uvbuf);
   1185        fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
   1186            U_ICU_VERSION,
   1187            uvbuf);
   1188 
   1189        if (propEnum < UCHAR_BINARY_LIMIT) {
   1190            dumpBinaryProperty(propEnum, f);
   1191        } else if (UCHAR_INT_START <= propEnum && propEnum <= UCHAR_INT_LIMIT) {
   1192            dumpEnumeratedProperty(propEnum, f);
   1193        } else if (propEnum == UCHAR_GENERAL_CATEGORY_MASK) {
   1194            dumpGeneralCategoryMask(f);
   1195        } else if (propEnum == UCHAR_BIDI_MIRRORING_GLYPH) {
   1196            dumpBidiMirroringGlyph(f);
   1197        } else if (propEnum == UCHAR_SCRIPT_EXTENSIONS) {
   1198            dumpScriptExtensions(f);
   1199        } else {
   1200            std::cerr << "Don't know how to write property: " << propEnum << std::endl;
   1201            return U_INTERNAL_PROGRAM_ERROR;
   1202        }
   1203 
   1204        fclose(f);
   1205    }
   1206 
   1207    if (options[OPT_INDEX].doesOccur) {
   1208        FILE* f = prepareOutputFile("_index");
   1209        fprintf(f, "index = [\n");
   1210        for (const char* propName : propNames) {
   1211            // At this point, propName is a valid property name, so it should be alphanum ASCII
   1212            fprintf(f, "  { filename=\"%s.toml\" },\n", propName);
   1213        }
   1214        fprintf(f, "]\n");
   1215        fclose(f);
   1216    }
   1217 
   1218    return 0;
   1219 }
   1220 
   1221 struct AddRangeHelper {
   1222    UMutableCPTrie* ucptrie;
   1223 };
   1224 
   1225 static UBool U_CALLCONV
   1226 addRangeToUCPTrie(const void* context, UChar32 start, UChar32 end, uint32_t value) {
   1227    IcuToolErrorCode status("addRangeToUCPTrie");
   1228    UMutableCPTrie* ucptrie = static_cast<const AddRangeHelper*>(context)->ucptrie;
   1229    umutablecptrie_setRange(ucptrie, start, end, value, status);
   1230    handleError(status, __LINE__, "setRange");
   1231 
   1232    return true;
   1233 }
   1234 
   1235 int exportCase(int argc, char* argv[]) {
   1236    if (argc > 1) {
   1237        fprintf(stderr, "ucase mode does not expect additional arguments\n");
   1238        return U_ILLEGAL_ARGUMENT_ERROR;
   1239    }
   1240    (void) argv; // Suppress unused variable warning
   1241 
   1242    IcuToolErrorCode status("icuexportdata");
   1243    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
   1244    handleError(status, __LINE__, "exportCase");
   1245 
   1246    int32_t exceptionsLength, unfoldLength;
   1247    const UCaseProps *caseProps = ucase_getSingleton(&exceptionsLength, &unfoldLength);
   1248    const UTrie2* caseTrie = &caseProps->trie;
   1249 
   1250    AddRangeHelper helper = { builder.getAlias() };
   1251    utrie2_enum(caseTrie, nullptr, addRangeToUCPTrie, &helper);
   1252 
   1253    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_16;
   1254    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
   1255        builder.getAlias(),
   1256        trieType,
   1257        width,
   1258        status));
   1259    handleError(status, __LINE__, "exportCase");
   1260 
   1261    FILE* f = prepareOutputFile("ucase");
   1262 
   1263    UVersionInfo versionInfo;
   1264    u_getUnicodeVersion(versionInfo);
   1265    char uvbuf[U_MAX_VERSION_STRING_LENGTH];
   1266    u_versionToString(versionInfo, uvbuf);
   1267    fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
   1268            U_ICU_VERSION,
   1269            uvbuf);
   1270 
   1271    fputs("[ucase.code_point_trie]\n", f);
   1272    usrc_writeUCPTrie(f, "case_trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
   1273    fputs("\n", f);
   1274 
   1275    const char* indent = "  ";
   1276    const char* suffix = "\n]\n";
   1277 
   1278    fputs("[ucase.exceptions]\n", f);
   1279    const char* exceptionsPrefix = "exceptions = [\n  ";
   1280    int32_t exceptionsWidth = 16;
   1281    usrc_writeArray(f, exceptionsPrefix, caseProps->exceptions, exceptionsWidth,
   1282                    exceptionsLength, indent, suffix);
   1283    fputs("\n", f);
   1284 
   1285    fputs("[ucase.unfold]\n", f);
   1286    const char* unfoldPrefix = "unfold = [\n  ";
   1287    int32_t unfoldWidth = 16;
   1288    usrc_writeArray(f, unfoldPrefix, caseProps->unfold, unfoldWidth,
   1289                    unfoldLength, indent, suffix);
   1290 
   1291    return 0;
   1292 }
   1293 
   1294 #if !UCONFIG_NO_NORMALIZATION
   1295 
   1296 int exportNorm() {
   1297    IcuToolErrorCode status("icuexportdata: exportNorm");
   1298    USet* backwardCombiningStarters = uset_openEmpty();
   1299    writeCanonicalCompositions(backwardCombiningStarters);
   1300 
   1301    std::vector<uint16_t> storage16;
   1302    std::vector<uint32_t> storage32;
   1303 
   1304    // Note: the USets are not exported. They are only used to check that a new
   1305    // Unicode version doesn't violate expectations that are hard-coded in ICU4X.
   1306    USet* nfdDecompositionStartsWithNonStarter = uset_openEmpty();
   1307    USet* nfdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
   1308    std::vector<PendingDescriptor> nfdPendingTrieInsertions;
   1309    UChar32 nfdBound = 0x10FFFF;
   1310    UChar32 nfcBound = 0x10FFFF;
   1311    computeDecompositions("nfd",
   1312                          backwardCombiningStarters,
   1313                          storage16,
   1314                          storage32,
   1315                          nfdDecompositionStartsWithNonStarter,
   1316                          nfdDecompositionStartsWithBackwardCombiningStarter,
   1317                          nfdPendingTrieInsertions,
   1318                          nfdBound,
   1319                          nfcBound);
   1320    if (!(nfdBound == 0xC0 && nfcBound == 0x300)) {
   1321        // Unexpected bounds for NFD/NFC.
   1322        status.set(U_INTERNAL_PROGRAM_ERROR);
   1323        handleError(status, __LINE__, "exportNorm");
   1324    }
   1325 
   1326    uint32_t baseSize16 = storage16.size();
   1327    uint32_t baseSize32 = storage32.size();
   1328 
   1329    USet* nfkdDecompositionStartsWithNonStarter = uset_openEmpty();
   1330    USet* nfkdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
   1331    std::vector<PendingDescriptor> nfkdPendingTrieInsertions;
   1332    UChar32 nfkdBound = 0x10FFFF;
   1333    UChar32 nfkcBound = 0x10FFFF;
   1334    computeDecompositions("nfkd",
   1335                          backwardCombiningStarters,
   1336                          storage16,
   1337                          storage32,
   1338                          nfkdDecompositionStartsWithNonStarter,
   1339                          nfkdDecompositionStartsWithBackwardCombiningStarter,
   1340                          nfkdPendingTrieInsertions,
   1341                          nfkdBound,
   1342                          nfkcBound);
   1343    if (!(nfkdBound <= 0xC0 && nfkcBound <= 0x300)) {
   1344        status.set(U_INTERNAL_PROGRAM_ERROR);
   1345        handleError(status, __LINE__, "exportNorm");
   1346    }
   1347    if (nfkcBound > 0xC0) {
   1348        if (nfkdBound != 0xC0) {
   1349            status.set(U_INTERNAL_PROGRAM_ERROR);
   1350            handleError(status, __LINE__, "exportNorm");
   1351        }
   1352    } else {
   1353        if (nfkdBound != nfkcBound) {
   1354            status.set(U_INTERNAL_PROGRAM_ERROR);
   1355            handleError(status, __LINE__, "exportNorm");
   1356        }
   1357    }
   1358 
   1359    USet* uts46DecompositionStartsWithNonStarter = uset_openEmpty();
   1360    USet* uts46DecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
   1361    std::vector<PendingDescriptor> uts46PendingTrieInsertions;
   1362    UChar32 uts46dBound = 0x10FFFF;
   1363    UChar32 uts46Bound = 0x10FFFF;
   1364    computeDecompositions("uts46d",
   1365                          backwardCombiningStarters,
   1366                          storage16,
   1367                          storage32,
   1368                          uts46DecompositionStartsWithNonStarter,
   1369                          uts46DecompositionStartsWithBackwardCombiningStarter,
   1370                          uts46PendingTrieInsertions,
   1371                          uts46dBound,
   1372                          uts46Bound);
   1373    if (!(uts46dBound <= 0xC0 && uts46Bound <= 0x300)) {
   1374        status.set(U_INTERNAL_PROGRAM_ERROR);
   1375        handleError(status, __LINE__, "exportNorm");
   1376    }
   1377    if (uts46Bound > 0xC0) {
   1378        if (uts46dBound != 0xC0) {
   1379            status.set(U_INTERNAL_PROGRAM_ERROR);
   1380            handleError(status, __LINE__, "exportNorm");
   1381        }
   1382    } else {
   1383        if (uts46dBound != uts46Bound) {
   1384            status.set(U_INTERNAL_PROGRAM_ERROR);
   1385            handleError(status, __LINE__, "exportNorm");
   1386        }
   1387    }
   1388 
   1389    uint32_t supplementSize16 = storage16.size() - baseSize16;
   1390    uint32_t supplementSize32 = storage32.size() - baseSize32;
   1391 
   1392    writeDecompositionData("nfd", baseSize16, baseSize32, supplementSize16, nfdDecompositionStartsWithNonStarter, nullptr, nfdPendingTrieInsertions, nfdPendingTrieInsertions, static_cast<char16_t>(nfcBound));
   1393    writeDecompositionData("nfkd", baseSize16, baseSize32, supplementSize16, nfkdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, nfkdPendingTrieInsertions, nfdPendingTrieInsertions, static_cast<char16_t>(nfkcBound));
   1394    writeDecompositionData("uts46d", baseSize16, baseSize32, supplementSize16, uts46DecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, uts46PendingTrieInsertions, nfdPendingTrieInsertions, static_cast<char16_t>(uts46Bound));
   1395 
   1396    writeDecompositionTables("nfdex", storage16.data(), baseSize16, storage32.data(), baseSize32);
   1397    writeDecompositionTables("nfkdex", storage16.data() + baseSize16, supplementSize16, storage32.data() + baseSize32, supplementSize32);
   1398 
   1399    uset_close(nfdDecompositionStartsWithNonStarter);
   1400    uset_close(nfkdDecompositionStartsWithNonStarter);
   1401    uset_close(uts46DecompositionStartsWithNonStarter);
   1402 
   1403    uset_close(nfdDecompositionStartsWithBackwardCombiningStarter);
   1404    uset_close(nfkdDecompositionStartsWithBackwardCombiningStarter);
   1405    uset_close(uts46DecompositionStartsWithBackwardCombiningStarter);
   1406 
   1407    uset_close(backwardCombiningStarters);
   1408    handleError(status, __LINE__, "exportNorm");
   1409    return 0;
   1410 }
   1411 
   1412 #endif // !UCONFIG_NO_NORMALIZATION
   1413 
   1414 int main(int argc, char* argv[]) {
   1415    U_MAIN_INIT_ARGS(argc, argv);
   1416 
   1417    /* preset then read command line options */
   1418    options[OPT_DESTDIR].value=u_getDataDirectory();
   1419    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
   1420 
   1421    if(options[OPT_VERSION].doesOccur) {
   1422        printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n",
   1423               U_ICU_DATA_VERSION);
   1424        printf("%s\n", U_COPYRIGHT_STRING);
   1425        exit(0);
   1426    }
   1427 
   1428    /* error handling, printing usage message */
   1429    if(argc<0) {
   1430        fprintf(stderr,
   1431            "error in command line argument \"%s\"\n",
   1432            argv[-argc]);
   1433    }
   1434 
   1435    if (argc < 0
   1436            || options[OPT_HELP_H].doesOccur
   1437            || options[OPT_HELP_QUESTION_MARK].doesOccur
   1438            || !options[OPT_MODE].doesOccur) {
   1439        FILE *stdfile=argc<0 ? stderr : stdout;
   1440        printHelp(stdfile, argv[0]);
   1441        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1442    }
   1443 
   1444    /* get the options values */
   1445    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
   1446    destdir = options[OPT_DESTDIR].value;
   1447    VERBOSE = options[OPT_VERBOSE].doesOccur;
   1448    QUIET = options[OPT_QUIET].doesOccur;
   1449 
   1450    if (options[OPT_TRIE_TYPE].doesOccur) {
   1451        if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
   1452            trieType = UCPTRIE_TYPE_FAST;
   1453        } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
   1454            trieType = UCPTRIE_TYPE_SMALL;
   1455        } else {
   1456            fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
   1457            return U_ILLEGAL_ARGUMENT_ERROR;
   1458        }
   1459    }
   1460 
   1461    const char* mode = options[OPT_MODE].value;
   1462    if (uprv_strcmp(mode, "norm") == 0) {
   1463 #if !UCONFIG_NO_NORMALIZATION
   1464        return exportNorm();
   1465 #else
   1466    fprintf(stderr, "Exporting normalization data not supported when compiling without normalization support.\n");
   1467    return U_ILLEGAL_ARGUMENT_ERROR;
   1468 #endif
   1469    }
   1470    if (uprv_strcmp(mode, "uprops") == 0) {
   1471        return exportUprops(argc, argv);
   1472    } else if (uprv_strcmp(mode, "ucase") == 0) {
   1473        return exportCase(argc, argv);
   1474    }
   1475 
   1476    fprintf(stderr, "Invalid option for --mode (must be uprops, ucase, or norm)\n");
   1477    return U_ILLEGAL_ARGUMENT_ERROR;
   1478 }