tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

number_longnames.cpp (80047B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include <cstdlib>
      9 
     10 #include "unicode/simpleformatter.h"
     11 #include "unicode/ures.h"
     12 #include "unicode/plurrule.h"
     13 #include "unicode/strenum.h"
     14 #include "ureslocs.h"
     15 #include "charstr.h"
     16 #include "uresimp.h"
     17 #include "measunit_impl.h"
     18 #include "number_longnames.h"
     19 #include "number_microprops.h"
     20 #include <algorithm>
     21 #include "cstring.h"
     22 #include "util.h"
     23 #include "sharedpluralrules.h"
     24 
     25 using namespace icu;
     26 using namespace icu::number;
     27 using namespace icu::number::impl;
     28 
     29 namespace {
     30 
     31 /**
     32 * Display Name (this format has no placeholder).
     33 *
     34 * Used as an index into the LongNameHandler::simpleFormats array. Units
     35 * resources cover the normal set of PluralRules keys, as well as `dnam` and
     36 * `per` forms.
     37 */
     38 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
     39 /**
     40 * "per" form (e.g. "{0} per day" is day's "per" form).
     41 *
     42 * Used as an index into the LongNameHandler::simpleFormats array. Units
     43 * resources cover the normal set of PluralRules keys, as well as `dnam` and
     44 * `per` forms.
     45 */
     46 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
     47 /**
     48 * Gender of the word, in languages with grammatical gender.
     49 */
     50 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
     51 /**
     52 *  Denominator constant of the unit.
     53 */
     54 constexpr int32_t CONSTANT_DENOMINATOR_INDEX = StandardPlural::Form::COUNT + 3;
     55 // Number of keys in the array populated by PluralTableSink.
     56 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 4;
     57 
     58 // TODO(icu-units#28): load this list from resources, after creating a "&set"
     59 // function for use in ldml2icu rules.
     60 const int32_t GENDER_COUNT = 7;
     61 const char *gGenders[GENDER_COUNT] = {"animate",   "common", "feminine", "inanimate",
     62                                      "masculine", "neuter", "personal"};
     63 
     64 // Converts a UnicodeString to a const char*, either pointing to a string in
     65 // gGenders, or pointing to an empty string if an appropriate string was not
     66 // found.
     67 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
     68    if (uGender.length() == 0) {
     69        return "";
     70    }
     71    CharString gender;
     72    gender.appendInvariantChars(uGender, status);
     73    if (U_FAILURE(status)) {
     74        return "";
     75    }
     76    int32_t first = 0;
     77    int32_t last = GENDER_COUNT;
     78    while (first < last) {
     79        int32_t mid = (first + last) / 2;
     80        int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
     81        if (cmp == 0) {
     82            return gGenders[mid];
     83        } else if (cmp > 0) {
     84            first = mid + 1;
     85        } else if (cmp < 0) {
     86            last = mid;
     87        }
     88    }
     89    // We don't return an error in case our gGenders list is incomplete in
     90    // production.
     91    //
     92    // TODO(icu-units#28): a unit test checking all locales' genders are covered
     93    // by gGenders? Else load a complete list of genders found in
     94    // grammaticalFeatures in an initOnce.
     95    return "";
     96 }
     97 
     98 // Returns the array index that corresponds to the given pluralKeyword.
     99 int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
    100    // pluralKeyword can also be "dnam", "per", or "gender"
    101    switch (*pluralKeyword) {
    102    case 'd':
    103        if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
    104            return DNAM_INDEX;
    105        }
    106        break;
    107    case 'g':
    108        if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
    109            return GENDER_INDEX;
    110        }
    111        break;
    112    case 'p':
    113        if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
    114            return PER_INDEX;
    115        }
    116        break;
    117    default:
    118        break;
    119    }
    120    StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
    121    return plural;
    122 }
    123 
    124 // Selects a string out of the `strings` array which corresponds to the
    125 // specified plural form, with fallback to the OTHER form.
    126 //
    127 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
    128 // of the plural forms, plus a display name ("dnam") and a "per" form.
    129 UnicodeString getWithPlural(
    130        const UnicodeString* strings,
    131        StandardPlural::Form plural,
    132        UErrorCode& status) {
    133    UnicodeString result = strings[plural];
    134    if (result.isBogus()) {
    135        result = strings[StandardPlural::Form::OTHER];
    136    }
    137    if (result.isBogus()) {
    138        // There should always be data in the "other" plural variant.
    139        status = U_INTERNAL_PROGRAM_ERROR;
    140    }
    141    return result;
    142 }
    143 
    144 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
    145 
    146 /**
    147 * Returns three outputs extracted from pattern.
    148 *
    149 * @param coreUnit is extracted as per Extract(...) in the spec:
    150 *   https://unicode.org/reports/tr35/tr35-general.html#compound-units
    151 * @param PlaceholderPosition indicates where in the string the placeholder was
    152 *   found.
    153 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
    154 *   contains the space character (if any) that separated the placeholder from
    155 *   the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
    156 *   space character is considered.
    157 */
    158 void extractCorePattern(const UnicodeString &pattern,
    159                        UnicodeString &coreUnit,
    160                        PlaceholderPosition &placeholderPosition,
    161                        char16_t &joinerChar) {
    162    joinerChar = 0;
    163    int32_t len = pattern.length();
    164    if (pattern.startsWith(u"{0}", 3)) {
    165        placeholderPosition = PH_BEGINNING;
    166        if (u_isJavaSpaceChar(pattern[3])) {
    167            joinerChar = pattern[3];
    168            coreUnit.setTo(pattern, 4, len - 4);
    169        } else {
    170            coreUnit.setTo(pattern, 3, len - 3);
    171        }
    172    } else if (pattern.endsWith(u"{0}", 3)) {
    173        placeholderPosition = PH_END;
    174        if (u_isJavaSpaceChar(pattern[len - 4])) {
    175            coreUnit.setTo(pattern, 0, len - 4);
    176            joinerChar = pattern[len - 4];
    177        } else {
    178            coreUnit.setTo(pattern, 0, len - 3);
    179        }
    180    } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
    181        placeholderPosition = PH_NONE;
    182        coreUnit = pattern;
    183    } else {
    184        placeholderPosition = PH_MIDDLE;
    185        coreUnit = pattern;
    186    }
    187 }
    188 
    189 //////////////////////////
    190 /// BEGIN DATA LOADING ///
    191 //////////////////////////
    192 
    193 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
    194 // string both in case of unknown gender and in case of unknown unit.
    195 UnicodeString
    196 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
    197    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
    198    if (U_FAILURE(status)) { return {}; }
    199 
    200    // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
    201    // TODO(ICU-20400): Get duration-*-person data properly with aliases.
    202    StringPiece subtypeForResource;
    203    int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
    204    if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
    205        subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
    206    } else {
    207        subtypeForResource = builtinUnit.getSubtype();
    208    }
    209 
    210    CharString key;
    211    key.append("units/", status);
    212    key.append(builtinUnit.getType(), status);
    213    key.append("/", status);
    214    key.append(subtypeForResource, status);
    215    key.append("/gender", status);
    216 
    217    UErrorCode localStatus = status;
    218    int32_t resultLen = 0;
    219    const char16_t *result =
    220        ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
    221    if (U_SUCCESS(localStatus)) {
    222        status = localStatus;
    223        return UnicodeString(true, result, resultLen);
    224    } else {
    225        // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
    226        // check whether the parent "$unitRes" exists? Then we could return
    227        // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
    228        // being a builtin).
    229        return {};
    230    }
    231 }
    232 
    233 // Loads data from a resource tree with paths matching
    234 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
    235 // and genders.
    236 //
    237 // An InflectedPluralSink is configured to load data for a specific gender and
    238 // case. It loads all plural forms, because selection between plural forms is
    239 // dependent upon the value being formatted.
    240 //
    241 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
    242 // units/compound/power2: German has case, French has differences for gender,
    243 // but no case.
    244 //
    245 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
    246 // tree structures are different. After homogenizing the structures, we may be
    247 // able to unify the two classes.
    248 //
    249 // TODO: Spec violation: expects presence of "count" - does not fallback to an
    250 // absent "count"! If this fallback were added, getCompoundValue could be
    251 // superseded?
    252 class InflectedPluralSink : public ResourceSink {
    253  public:
    254    // Accepts `char*` rather than StringPiece because
    255    // ResourceTable::findValue(...) requires a null-terminated `char*`.
    256    //
    257    // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
    258    // checking is performed.
    259    explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
    260        : gender(gender), caseVariant(caseVariant), outArray(outArray) {
    261        // Initialize the array to bogus strings.
    262        for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
    263            outArray[i].setToBogus();
    264        }
    265    }
    266 
    267    // See ResourceSink::put().
    268    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
    269        int32_t pluralIndex = getIndex(key, status);
    270        if (U_FAILURE(status)) { return; }
    271        if (!outArray[pluralIndex].isBogus()) {
    272            // We already have a pattern
    273            return;
    274        }
    275        ResourceTable genderTable = value.getTable(status);
    276        ResourceTable caseTable; // This instance has to outlive `value`
    277        if (loadForPluralForm(genderTable, caseTable, value, status)) {
    278            outArray[pluralIndex] = value.getUnicodeString(status);
    279        }
    280    }
    281 
    282  private:
    283    // Tries to load data for the configured gender from `genderTable`. Returns
    284    // true if found, returning the data in `value`. The returned data will be
    285    // for the configured gender if found, falling back to "neuter" and
    286    // no-gender if not. The caseTable parameter holds the intermediate
    287    // ResourceTable for the sake of lifetime management.
    288    bool loadForPluralForm(const ResourceTable &genderTable,
    289                           ResourceTable &caseTable,
    290                           ResourceValue &value,
    291                           UErrorCode &status) {
    292        if (uprv_strcmp(gender, "") != 0) {
    293            if (loadForGender(genderTable, gender, caseTable, value, status)) {
    294                return true;
    295            }
    296            if (uprv_strcmp(gender, "neuter") != 0 &&
    297                loadForGender(genderTable, "neuter", caseTable, value, status)) {
    298                return true;
    299            }
    300        }
    301        if (loadForGender(genderTable, "_", caseTable, value, status)) {
    302            return true;
    303        }
    304        return false;
    305    }
    306 
    307    // Tries to load data for the given gender from `genderTable`. Returns true
    308    // if found, returning the data in `value`. The returned data will be for
    309    // the configured case if found, falling back to "nominative" and no-case if
    310    // not.
    311    bool loadForGender(const ResourceTable &genderTable,
    312                       const char *genderVal,
    313                       ResourceTable &caseTable,
    314                       ResourceValue &value,
    315                       UErrorCode &status) {
    316        if (!genderTable.findValue(genderVal, value)) {
    317            return false;
    318        }
    319        caseTable = value.getTable(status);
    320        if (uprv_strcmp(caseVariant, "") != 0) {
    321            if (loadForCase(caseTable, caseVariant, value)) {
    322                return true;
    323            }
    324            if (uprv_strcmp(caseVariant, "nominative") != 0 &&
    325                loadForCase(caseTable, "nominative", value)) {
    326                return true;
    327            }
    328        }
    329        if (loadForCase(caseTable, "_", value)) {
    330            return true;
    331        }
    332        return false;
    333    }
    334 
    335    // Tries to load data for the given case from `caseTable`. Returns true if
    336    // found, returning the data in `value`.
    337    bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
    338        if (!caseTable.findValue(caseValue, value)) {
    339            return false;
    340        }
    341        return true;
    342    }
    343 
    344    const char *gender;
    345    const char *caseVariant;
    346    UnicodeString *outArray;
    347 };
    348 
    349 // Fetches localised formatting patterns for the given subKey. See documentation
    350 // for InflectedPluralSink for details.
    351 //
    352 // Data is loaded for the appropriate unit width, with missing data filled in
    353 // from unitsShort.
    354 void getInflectedMeasureData(StringPiece subKey,
    355                             const Locale &locale,
    356                             const UNumberUnitWidth &width,
    357                             const char *gender,
    358                             const char *caseVariant,
    359                             UnicodeString *outArray,
    360                             UErrorCode &status) {
    361    InflectedPluralSink sink(gender, caseVariant, outArray);
    362    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
    363    if (U_FAILURE(status)) { return; }
    364 
    365    CharString key;
    366    key.append("units", status);
    367    if (width == UNUM_UNIT_WIDTH_NARROW) {
    368        key.append("Narrow", status);
    369    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
    370        key.append("Short", status);
    371    }
    372    key.append("/", status);
    373    key.append(subKey, status);
    374 
    375    UErrorCode localStatus = status;
    376    ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
    377    if (width == UNUM_UNIT_WIDTH_SHORT) {
    378        status = localStatus;
    379        return;
    380    }
    381 }
    382 
    383 class PluralTableSink : public ResourceSink {
    384  public:
    385    // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
    386    // checking is performed.
    387    explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
    388        // Initialize the array to bogus strings.
    389        for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
    390            outArray[i].setToBogus();
    391        }
    392    }
    393 
    394    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
    395        if (uprv_strcmp(key, "case") == 0) {
    396            return;
    397        }
    398        int32_t index = getIndex(key, status);
    399        if (U_FAILURE(status)) { return; }
    400        if (!outArray[index].isBogus()) {
    401            return;
    402        }
    403        outArray[index] = value.getUnicodeString(status);
    404        if (U_FAILURE(status)) { return; }
    405    }
    406 
    407  private:
    408    UnicodeString *outArray;
    409 };
    410 
    411 /**
    412 * Populates outArray with `locale`-specific values for `unit` through use of
    413 * PluralTableSink. Only the set of basic units are supported!
    414 *
    415 * Reading from resources *unitsNarrow* and *unitsShort* (for width
    416 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
    417 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
    418 *
    419 * @param unit must be a built-in unit, i.e. must have a type and subtype,
    420 *     listed in gTypes and gSubTypes in measunit.cpp.
    421 * @param unitDisplayCase the empty string and "nominative" are treated the
    422 *     same. For other cases, strings for the requested case are used if found.
    423 *     (For any missing case-specific data, we fall back to nominative.)
    424 * @param outArray must be of fixed length ARRAY_LENGTH.
    425 */
    426 void getMeasureData(const Locale &locale,
    427                    const MeasureUnit &unit,
    428                    const UNumberUnitWidth &width,
    429                    const char *unitDisplayCase,
    430                    UnicodeString *outArray,
    431                    UErrorCode &status) {
    432    PluralTableSink sink(outArray);
    433    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
    434    if (U_FAILURE(status)) { return; }
    435 
    436    CharString subKey;
    437    subKey.append("/", status);
    438    subKey.append(unit.getType(), status);
    439    subKey.append("/", status);
    440 
    441    // TODO(ICU-23226): Refactor LongNameHandler to use gUnitAliases and gUnitReplacements measunit_extra.cpp instead of local reasource bundle.
    442    // Check if unitSubType is an alias or not.
    443    LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status));
    444 
    445    UErrorCode aliasStatus = status;
    446    StackUResourceBundle aliasFillIn;
    447    CharString aliasKey;
    448    aliasKey.append("alias/unit/", aliasStatus);
    449    aliasKey.append(unit.getSubtype(), aliasStatus);
    450    aliasKey.append("/replacement", aliasStatus);
    451    ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(),
    452                              &aliasStatus);
    453    CharString unitSubType;
    454    if (!U_FAILURE(aliasStatus)) {
    455        // This means the subType is an alias. Then, replace unitSubType with the replacement.
    456        auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status);
    457        unitSubType.appendInvariantChars(replacement, status);
    458    } else {
    459        unitSubType.append(unit.getSubtype(), status);
    460    }
    461 
    462    // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
    463    // TODO(ICU-20400): Get duration-*-person data properly with aliases.
    464    int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()));
    465    if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) {
    466        subKey.append({unitSubType.data(), subtypeLen - 7}, status);
    467    } else {
    468        subKey.append({unitSubType.data(), subtypeLen}, status);
    469    }
    470 
    471    if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
    472        UErrorCode localStatus = status;
    473        CharString genderKey;
    474        genderKey.append("units", localStatus);
    475        genderKey.append(subKey, localStatus);
    476        genderKey.append("/gender", localStatus);
    477        StackUResourceBundle fillIn;
    478        ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
    479                                  &localStatus);
    480        outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
    481    }
    482 
    483    CharString key;
    484    key.append("units", status);
    485    if (width == UNUM_UNIT_WIDTH_NARROW) {
    486        key.append("Narrow", status);
    487    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
    488        key.append("Short", status);
    489    }
    490    key.append(subKey, status);
    491 
    492    // Grab desired case first, if available. Then grab no-case data to fill in
    493    // the gaps.
    494    if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
    495        CharString caseKey;
    496        caseKey.append(key, status);
    497        caseKey.append("/case/", status);
    498        caseKey.append(unitDisplayCase, status);
    499 
    500        UErrorCode localStatus = U_ZERO_ERROR;
    501        // TODO(icu-units#138): our fallback logic is not spec-compliant:
    502        // lateral fallback should happen before locale fallback. Switch to
    503        // getInflectedMeasureData after homogenizing data format? Find a unit
    504        // test case that demonstrates the incorrect fallback logic (via
    505        // regional variant of an inflected language?)
    506        ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
    507    }
    508 
    509    // TODO(icu-units#138): our fallback logic is not spec-compliant: we
    510    // check the given case, then go straight to the no-case data. The spec
    511    // states we should first look for case="nominative". As part of #138,
    512    // either get the spec changed, or add unit tests that warn us if
    513    // case="nominative" data differs from no-case data?
    514    UErrorCode localStatus = U_ZERO_ERROR;
    515    ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
    516    if (width == UNUM_UNIT_WIDTH_SHORT) {
    517        if (U_FAILURE(localStatus)) {
    518            status = localStatus;
    519        }
    520        return;
    521    }
    522 }
    523 
    524 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
    525 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
    526                             UErrorCode &status) {
    527    // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
    528    // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
    529    PluralTableSink sink(outArray);
    530    // Here all outArray entries are bogus.
    531    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
    532    if (U_FAILURE(status)) { return; }
    533    ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
    534    if (U_FAILURE(status)) { return; }
    535    // Here the outArray[] entries are filled in with any CurrencyUnitPatterns data for locale,
    536    // or if there is no CurrencyUnitPatterns data for locale since the patterns all inherited
    537    // from the "other" pattern in root (which is true for many locales in CLDR 46), then only
    538    // the "other" entry has a currency pattern. So now what we do is: For all valid plural keywords
    539    // for the locale, if the corresponding outArray[] entry is bogus, fill it in from the "other"
    540    // entry. In the longer run, clients of this should instead consider using CurrencyPluralInfo
    541    // (see i18n/unicode/currpinf.h).
    542    UErrorCode localStatus = U_ZERO_ERROR;
    543    const SharedPluralRules *pr = PluralRules::createSharedInstance(
    544            locale, UPLURAL_TYPE_CARDINAL, localStatus);
    545    if (U_SUCCESS(localStatus)) {
    546        LocalPointer<StringEnumeration> keywords((*pr)->getKeywords(localStatus), localStatus);
    547        if (U_SUCCESS(localStatus)) {
    548            const char* keyword;
    549            while (((keyword = keywords->next(nullptr, localStatus)) != nullptr) && U_SUCCESS(localStatus)) {
    550                int32_t index = StandardPlural::indexOrOtherIndexFromString(keyword);
    551                if (index != StandardPlural::Form::OTHER && outArray[index].isBogus()) {
    552                    outArray[index].setTo(outArray[StandardPlural::Form::OTHER]);
    553                }
    554            }
    555        }
    556        pr->removeRef();
    557    }
    558 
    559    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
    560        UnicodeString &pattern = outArray[i];
    561        if (pattern.isBogus()) {
    562            continue;
    563        }
    564        int32_t longNameLen = 0;
    565        const char16_t *longName = ucurr_getPluralName(
    566                currency.getISOCurrency(),
    567                locale.getName(),
    568                nullptr /* isChoiceFormat */,
    569                StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
    570                &longNameLen,
    571                &status);
    572        // Example pattern from data: "{0} {1}"
    573        // Example output after find-and-replace: "{0} US dollars"
    574        pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
    575    }
    576 }
    577 
    578 UnicodeString getCompoundValue(StringPiece compoundKey,
    579                               const Locale &locale,
    580                               const UNumberUnitWidth &width,
    581                               UErrorCode &status) {
    582    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
    583    if (U_FAILURE(status)) { return {}; }
    584    CharString key;
    585    key.append("units", status);
    586    if (width == UNUM_UNIT_WIDTH_NARROW) {
    587        key.append("Narrow", status);
    588    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
    589        key.append("Short", status);
    590    }
    591    key.append("/compound/", status);
    592    key.append(compoundKey, status);
    593 
    594    UErrorCode localStatus = status;
    595    int32_t len = 0;
    596    const char16_t *ptr =
    597        ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
    598    if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
    599        // Fall back to short, which contains more compound data
    600        key.clear();
    601        key.append("unitsShort/compound/", status);
    602        key.append(compoundKey, status);
    603        ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
    604    } else {
    605        status = localStatus;
    606    }
    607    if (U_FAILURE(status)) {
    608        return {};
    609    }
    610    return UnicodeString(ptr, len);
    611 }
    612 
    613 /**
    614 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
    615 *
    616 * Consider a deriveComponent rule that looks like this:
    617 *
    618 *     <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
    619 *
    620 * Instantiating an instance as follows:
    621 *
    622 *     DerivedComponents d(loc, "case", "per");
    623 *
    624 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
    625 * and `d.value1("foo")` will be "nominative".
    626 *
    627 * The values returned by value0(...) and value1(...) are valid only while the
    628 * instance exists. In case of any kind of failure, value0(...) and value1(...)
    629 * will return "".
    630 */
    631 class DerivedComponents {
    632  public:
    633    /**
    634     * Constructor.
    635     *
    636     * The feature and structure parameters must be null-terminated. The string
    637     * referenced by compoundValue must exist for longer than the
    638     * DerivedComponents instance.
    639     */
    640    DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
    641        StackUResourceBundle derivationsBundle, stackBundle;
    642        ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
    643        ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
    644                      &status);
    645        ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
    646                      &status);
    647        if (U_FAILURE(status)) {
    648            return;
    649        }
    650        UErrorCode localStatus = U_ZERO_ERROR;
    651        // TODO(icu-units#28): use standard normal locale resolution algorithms
    652        // rather than just grabbing language:
    653        ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
    654                      &localStatus);
    655        // TODO(icu-units#28):
    656        // - code currently assumes if the locale exists, the rules are there -
    657        //   instead of falling back to root when the requested rule is missing.
    658        // - investigate ures.h functions, see if one that uses res_findResource()
    659        //   might be better (or use res_findResource directly), or maybe help
    660        //   improve ures documentation to guide function selection?
    661        if (localStatus == U_MISSING_RESOURCE_ERROR) {
    662            ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
    663        } else {
    664            status = localStatus;
    665        }
    666        ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
    667        ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
    668        ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
    669        UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
    670        UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
    671        if (U_SUCCESS(status)) {
    672            if (val0.compare(UnicodeString(u"compound")) == 0) {
    673                compound0_ = true;
    674            } else {
    675                compound0_ = false;
    676                value0_.appendInvariantChars(val0, status);
    677            }
    678            if (val1.compare(UnicodeString(u"compound")) == 0) {
    679                compound1_ = true;
    680            } else {
    681                compound1_ = false;
    682                value1_.appendInvariantChars(val1, status);
    683            }
    684        }
    685    }
    686 
    687    // Returns a StringPiece that is only valid as long as the instance exists.
    688    StringPiece value0(const StringPiece compoundValue) const {
    689        return compound0_ ? compoundValue : value0_.toStringPiece();
    690    }
    691 
    692    // Returns a StringPiece that is only valid as long as the instance exists.
    693    StringPiece value1(const StringPiece compoundValue) const {
    694        return compound1_ ? compoundValue : value1_.toStringPiece();
    695    }
    696 
    697    // Returns a char* that is only valid as long as the instance exists.
    698    const char *value0(const char *compoundValue) const {
    699        return compound0_ ? compoundValue : value0_.data();
    700    }
    701 
    702    // Returns a char* that is only valid as long as the instance exists.
    703    const char *value1(const char *compoundValue) const {
    704        return compound1_ ? compoundValue : value1_.data();
    705    }
    706 
    707  private:
    708    UErrorCode status = U_ZERO_ERROR;
    709 
    710    // Holds strings referred to by value0 and value1;
    711    bool compound0_ = false, compound1_ = false;
    712    CharString value0_, value1_;
    713 };
    714 
    715 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
    716 // testsuite support for testing with synthetic data?
    717 /**
    718 * Loads and returns the value in rules that look like these:
    719 *
    720 * <deriveCompound feature="gender" structure="per" value="0"/>
    721 * <deriveCompound feature="gender" structure="times" value="1"/>
    722 *
    723 * Currently a fake example, but spec compliant:
    724 * <deriveCompound feature="gender" structure="power" value="feminine"/>
    725 *
    726 * NOTE: If U_FAILURE(status), returns an empty string.
    727 */ 
    728 UnicodeString
    729 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
    730    StackUResourceBundle derivationsBundle, stackBundle;
    731    ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
    732    ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
    733                  &status);
    734    ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
    735    // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
    736    ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
    737    // TODO:
    738    // - code currently assumes if the locale exists, the rules are there -
    739    //   instead of falling back to root when the requested rule is missing.
    740    // - investigate ures.h functions, see if one that uses res_findResource()
    741    //   might be better (or use res_findResource directly), or maybe help
    742    //   improve ures documentation to guide function selection?
    743    if (status == U_MISSING_RESOURCE_ERROR) {
    744        status = U_ZERO_ERROR;
    745        ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
    746    }
    747    ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
    748    ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
    749    UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
    750    if (U_FAILURE(status)) {
    751        return {};
    752    }
    753    U_ASSERT(!uVal.isBogus());
    754    return uVal;
    755 }
    756 
    757 // Returns the gender string for structures following these rules:
    758 //
    759 // <deriveCompound feature="gender" structure="per" value="0"/>
    760 // <deriveCompound feature="gender" structure="times" value="1"/>
    761 //
    762 // Fake example:
    763 // <deriveCompound feature="gender" structure="power" value="feminine"/>
    764 //
    765 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
    766 // correspond to value="0" and value="1".
    767 //
    768 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
    769 // "prefix" doesn't).
    770 UnicodeString getDerivedGender(Locale locale,
    771                               const char *structure,
    772                               UnicodeString *data0,
    773                               UnicodeString *data1,
    774                               UErrorCode &status) {
    775    UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
    776    if (val.length() == 1) {
    777        switch (val[0]) {
    778        case u'0':
    779            return data0[GENDER_INDEX];
    780        case u'1':
    781            if (data1 == nullptr) {
    782                return {};
    783            }
    784            return data1[GENDER_INDEX];
    785        }
    786    }
    787    return val;
    788 }
    789 
    790 ////////////////////////
    791 /// END DATA LOADING ///
    792 ////////////////////////
    793 
    794 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
    795 const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) {
    796    if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
    797        return s;
    798    }
    799    int32_t start = 0;
    800    int32_t limit = length;
    801    while (start < limit && u_isJavaSpaceChar(s[start])) {
    802        ++start;
    803    }
    804    if (start < limit) {
    805        // There is non-white space at start; we will not move limit below that,
    806        // so we need not test start<limit in the loop.
    807        while (u_isJavaSpaceChar(s[limit - 1])) {
    808            --limit;
    809        }
    810    }
    811    length = limit - start;
    812    return s + start;
    813 }
    814 
    815 /**
    816 * Calculates the gender of an arbitrary unit: this is the *second*
    817 * implementation of an algorithm to do this:
    818 *
    819 * Gender is also calculated in "processPatternTimes": that code path is "bottom
    820 * up", loading the gender for every component of a compound unit (at the same
    821 * time as loading the Long Names formatting patterns), even if the gender is
    822 * unneeded, then combining the single units' genders into the compound unit's
    823 * gender, according to the rules. This algorithm does a lazier "top-down"
    824 * evaluation, starting with the compound unit, calculating which single unit's
    825 * gender is needed by breaking it down according to the rules, and then loading
    826 * only the gender of the one single unit who's gender is needed.
    827 *
    828 * For future refactorings:
    829 * 1. we could drop processPatternTimes' gender calculation and just call this
    830 *    function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
    831 *    same table as the formatting patterns, so loading it then may be
    832 *    efficient. For other unit widths however, it needs to be explicitly looked
    833 *    up anyway.
    834 * 2. alternatively, if CLDR is providing all the genders we need such that we
    835 *    don't need to calculate them in ICU anymore, we could drop this function
    836 *    and keep only processPatternTimes' calculation. (And optimise it a bit?)
    837 *
    838 * @param locale The desired locale.
    839 * @param unit The measure unit to calculate the gender for.
    840 * @return The gender string for the unit, or an empty string if unknown or
    841 *     ungendered.
    842 */
    843 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
    844    MeasureUnitImpl impl;
    845    const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
    846    int32_t singleUnitIndex = 0;
    847    if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
    848        int32_t startSlice = 0;
    849        // inclusive
    850        int32_t endSlice = mui.singleUnits.length()-1;
    851        U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
    852        if (mui.singleUnits[endSlice]->dimensionality < 0) {
    853            // We have a -per- construct
    854            UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
    855            if (perRule.length() != 1) {
    856                // Fixed gender for -per- units
    857                return perRule;
    858            }
    859            if (perRule[0] == u'1') {
    860                // Find the start of the denominator. We already know there is one.
    861                while (mui.singleUnits[startSlice]->dimensionality >= 0) {
    862                    startSlice++;
    863                }
    864            } else {
    865                // Find the end of the numerator
    866                while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
    867                    endSlice--;
    868                }
    869                if (endSlice < 0) {
    870                    // We have only a denominator, e.g. "per-second".
    871                    // TODO(icu-units#28): find out what gender to use in the
    872                    // absence of a first value - mentioned in CLDR-14253.
    873                    return {};
    874                }
    875            }
    876        }
    877        if (endSlice > startSlice) {
    878            // We have a -times- construct
    879            UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
    880            if (timesRule.length() != 1) {
    881                // Fixed gender for -times- units
    882                return timesRule;
    883            }
    884            if (timesRule[0] == u'0') {
    885                endSlice = startSlice;
    886            } else {
    887                // We assume timesRule[0] == u'1'
    888                startSlice = endSlice;
    889            }
    890        }
    891        U_ASSERT(startSlice == endSlice);
    892        singleUnitIndex = startSlice;
    893    } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
    894        status = U_INTERNAL_PROGRAM_ERROR;
    895        return {};
    896    } else {
    897        U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
    898        U_ASSERT(mui.singleUnits.length() == 1);
    899    }
    900 
    901    // Now we know which singleUnit's gender we want
    902    const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
    903    // Check for any power-prefix gender override:
    904    if (std::abs(singleUnit->dimensionality) != 1) {
    905        UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
    906        if (powerRule.length() != 1) {
    907            // Fixed gender for -powN- units
    908            return powerRule;
    909        }
    910        // powerRule[0] == u'0'; u'1' not currently in spec.
    911    }
    912    // Check for any SI and binary prefix gender override:
    913    if (std::abs(singleUnit->dimensionality) != 1) {
    914        UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
    915        if (prefixRule.length() != 1) {
    916            // Fixed gender for -powN- units
    917            return prefixRule;
    918        }
    919        // prefixRule[0] == u'0'; u'1' not currently in spec.
    920    }
    921    // Now we've boiled it down to the gender of one simple unit identifier:
    922    return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
    923                               status);
    924 }
    925 
    926 void maybeCalculateGender(const Locale &locale,
    927                          const MeasureUnit &unitRef,
    928                          UnicodeString *outArray,
    929                          UErrorCode &status) {
    930    if (outArray[GENDER_INDEX].isBogus()) {
    931        UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
    932        if (meterGender.isEmpty()) {
    933            // No gender for meter: assume ungendered language
    934            return;
    935        }
    936        // We have a gendered language, but are lacking gender for unitRef.
    937        outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
    938    }
    939 }
    940 
    941 } // namespace
    942 
    943 void LongNameHandler::forMeasureUnit(const Locale &loc,
    944                                     const MeasureUnit &unitRef,
    945                                     const UNumberUnitWidth &width,
    946                                     const char *unitDisplayCase,
    947                                     const PluralRules *rules,
    948                                     const MicroPropsGenerator *parent,
    949                                     LongNameHandler *fillIn,
    950                                     UErrorCode &status) {
    951    // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
    952    // Points 1 and 2 are mostly handled by MeasureUnit:
    953    //
    954    // 1. If the unitId is empty or invalid, fail
    955    // 2. Put the unitId into normalized order
    956    U_ASSERT(fillIn != nullptr);
    957 
    958    if (uprv_strcmp(unitRef.getType(), "") != 0) {
    959        // Handling built-in units:
    960        //
    961        // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
    962        //    - If result is not empty, return it
    963        UnicodeString simpleFormats[ARRAY_LENGTH];
    964        getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
    965        maybeCalculateGender(loc, unitRef, simpleFormats, status);
    966        if (U_FAILURE(status)) {
    967            return;
    968        }
    969        fillIn->rules = rules;
    970        fillIn->parent = parent;
    971        fillIn->simpleFormatsToModifiers(simpleFormats,
    972                                         {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
    973        if (!simpleFormats[GENDER_INDEX].isBogus()) {
    974            fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
    975        }
    976        return;
    977 
    978        // TODO(icu-units#145): figure out why this causes a failure in
    979        // format/MeasureFormatTest/TestIndividualPluralFallback and other
    980        // tests, when it should have been an alternative for the lines above:
    981 
    982        // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
    983        // fillIn->rules = rules;
    984        // fillIn->parent = parent;
    985        // return;
    986    } else {
    987        // Check if it is a MeasureUnit this constructor handles: this
    988        // constructor does not handle mixed units
    989        U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
    990        forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
    991        fillIn->rules = rules;
    992        fillIn->parent = parent;
    993        return;
    994    }
    995 }
    996 
    997 void LongNameHandler::forArbitraryUnit(const Locale &loc,
    998                                       const MeasureUnit &unitRef,
    999                                       const UNumberUnitWidth &width,
   1000                                       const char *unitDisplayCase,
   1001                                       LongNameHandler *fillIn,
   1002                                       UErrorCode &status) {
   1003    if (U_FAILURE(status)) {
   1004        return;
   1005    }
   1006    if (fillIn == nullptr) {
   1007        status = U_INTERNAL_PROGRAM_ERROR;
   1008        return;
   1009    }
   1010 
   1011    // Numbered list items are from the algorithms at
   1012    // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
   1013    //
   1014    // 4. Divide the unitId into numerator (the part before the "-per-") and
   1015    //    denominator (the part after the "-per-). If both are empty, fail
   1016    MeasureUnitImpl unit;
   1017    MeasureUnitImpl perUnit;
   1018 
   1019    if (unitRef.getConstantDenominator(status) != 0) {
   1020        perUnit.constantDenominator = unitRef.getConstantDenominator(status);
   1021    }
   1022 
   1023    {
   1024        MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
   1025        if (U_FAILURE(status)) {
   1026            return;
   1027        }
   1028        for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
   1029            SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
   1030            if (subUnit->dimensionality > 0) {
   1031                unit.appendSingleUnit(*subUnit, status);
   1032            } else {
   1033                subUnit->dimensionality *= -1;
   1034                perUnit.appendSingleUnit(*subUnit, status);
   1035            }
   1036        }
   1037    }
   1038 
   1039    // TODO(icu-units#28): check placeholder logic, see if it needs to be
   1040    // present here instead of only in processPatternTimes:
   1041    //
   1042    // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
   1043 
   1044    DerivedComponents derivedPerCases(loc, "case", "per");
   1045 
   1046    // 6. numeratorUnitString
   1047    UnicodeString numeratorUnitData[ARRAY_LENGTH];
   1048    processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
   1049                        numeratorUnitData, status);
   1050 
   1051    // 7. denominatorUnitString
   1052    UnicodeString denominatorUnitData[ARRAY_LENGTH];
   1053    processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
   1054                        denominatorUnitData, status);
   1055 
   1056    // TODO(icu-units#139):
   1057    // - implement DerivedComponents for "plural/times" and "plural/power":
   1058    //   French has different rules, we'll be producing the wrong results
   1059    //   currently. (Prove via tests!)
   1060    // - implement DerivedComponents for "plural/per", "plural/prefix",
   1061    //   "case/times", "case/power", and "case/prefix" - although they're
   1062    //   currently hardcoded. Languages with different rules are surely on the
   1063    //   way.
   1064    //
   1065    // Currently we only use "case/per", "plural/times", "case/times", and
   1066    // "case/power".
   1067    //
   1068    // This may have impact on multiSimpleFormatsToModifiers(...) below too?
   1069    // These rules are currently (ICU 69) all the same and hard-coded below.
   1070    UnicodeString perUnitPattern;
   1071    if (!denominatorUnitData[PER_INDEX].isBogus()) {
   1072        // If we have no denominator, we obtain the empty string:
   1073        perUnitPattern = denominatorUnitData[PER_INDEX];
   1074    } else {
   1075        // 8. Set perPattern to be getValue([per], locale, length)
   1076        UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
   1077        // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
   1078        SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
   1079        if (U_FAILURE(status)) {
   1080            return;
   1081        }
   1082        // Plural and placeholder handling for 7. denominatorUnitString:
   1083        // TODO(icu-units#139): hardcoded:
   1084        // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
   1085        UnicodeString denominatorFormat =
   1086            getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
   1087        // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
   1088        SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
   1089        if (U_FAILURE(status)) {
   1090            return;
   1091        }
   1092        UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
   1093        int32_t trimmedLen = denominatorPattern.length();
   1094        const char16_t *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
   1095        UnicodeString denominatorString(false, trimmed, trimmedLen);
   1096        // 9. If the denominatorString is empty, set result to
   1097        //    [numeratorString], otherwise set result to format(perPattern,
   1098        //    numeratorString, denominatorString)
   1099        //
   1100        // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
   1101        // following line?
   1102        perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
   1103        if (U_FAILURE(status)) {
   1104            return;
   1105        }
   1106    }
   1107    if (perUnitPattern.length() == 0) {
   1108        fillIn->simpleFormatsToModifiers(numeratorUnitData,
   1109                                         {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
   1110    } else {
   1111        fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
   1112                                              {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
   1113    }
   1114 
   1115    // Gender
   1116    //
   1117    // TODO(icu-units#28): find out what gender to use in the absence of a first
   1118    // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
   1119    //
   1120    // gender/per deriveCompound rules don't say:
   1121    // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ←  gender(gram) -->
   1122    fillIn->gender = getGenderString(
   1123        getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
   1124 }
   1125 
   1126 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
   1127                                          Locale loc,
   1128                                          const UNumberUnitWidth &width,
   1129                                          const char *caseVariant,
   1130                                          UnicodeString *outArray,
   1131                                          UErrorCode &status) {
   1132    if (U_FAILURE(status)) {
   1133        return;
   1134    }
   1135    if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
   1136        // These are handled by MixedUnitLongNameHandler
   1137        status = U_UNSUPPORTED_ERROR;
   1138        return;
   1139    }
   1140 
   1141 #if U_DEBUG
   1142    for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
   1143        U_ASSERT(outArray[pluralIndex].length() == 0);
   1144        U_ASSERT(!outArray[pluralIndex].isBogus());
   1145    }
   1146 #endif
   1147 
   1148    if (productUnit.identifier.isEmpty()) {
   1149        // TODO(icu-units#28): consider when serialize should be called.
   1150        // identifier might also be empty for MeasureUnit().
   1151        productUnit.serialize(status);
   1152    }
   1153    if (U_FAILURE(status)) {
   1154        return;
   1155    }
   1156    if (productUnit.identifier.isEmpty()) {
   1157        // MeasureUnit(): no units: return empty strings.
   1158        return;
   1159    }
   1160 
   1161    MeasureUnit builtinUnit;
   1162    if (MeasureUnit::findBySubType(productUnit.identifier.data(), &builtinUnit)) {
   1163        // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
   1164        // breaks them all down. Do we want to drop this?
   1165        // - findBySubType isn't super efficient, if we skip it and go to basic
   1166        //   singles, we don't have to construct MeasureUnit's anymore.
   1167        // - Check all the existing unit tests that fail without this: is it due
   1168        //   to incorrect fallback via getMeasureData?
   1169        // - Do those unit tests cover this code path representatively?
   1170        if (builtinUnit != MeasureUnit()) {
   1171            getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
   1172            maybeCalculateGender(loc, builtinUnit, outArray, status);
   1173        }
   1174        return;
   1175    }
   1176 
   1177    // 2. Set timesPattern to be getValue(times, locale, length)
   1178    UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
   1179    SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
   1180    if (U_FAILURE(status)) {
   1181        return;
   1182    }
   1183 
   1184    PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
   1185    char16_t globalJoinerChar = 0;
   1186    // Numbered list items are from the algorithms at
   1187    // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
   1188    //
   1189    // pattern(...) point 5:
   1190    // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
   1191    //
   1192    // 3. Set result to be empty
   1193    for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
   1194        // Initial state: empty string pattern, via all falling back to OTHER:
   1195        if (pluralIndex == StandardPlural::Form::OTHER) {
   1196            outArray[pluralIndex].remove();
   1197        } else {
   1198            outArray[pluralIndex].setToBogus();
   1199        }
   1200        globalPlaceholder[pluralIndex] = PH_EMPTY;
   1201    }
   1202 
   1203    // Empty string represents "compound" (propagate the plural form).
   1204    const char *pluralCategory = "";
   1205    DerivedComponents derivedTimesPlurals(loc, "plural", "times");
   1206    DerivedComponents derivedTimesCases(loc, "case", "times");
   1207    DerivedComponents derivedPowerCases(loc, "case", "power");
   1208 
   1209    if (productUnit.constantDenominator != 0) {
   1210        CharString constantString;
   1211        constantString.appendNumber(productUnit.constantDenominator, status);
   1212        outArray[CONSTANT_DENOMINATOR_INDEX] = UnicodeString::fromUTF8(constantString.toStringPiece());
   1213    }
   1214 
   1215    // 4. For each single_unit in product_unit
   1216    for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
   1217         singleUnitIndex++) {
   1218        SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
   1219        const char *singlePluralCategory;
   1220        const char *singleCaseVariant;
   1221        // TODO(icu-units#28): ensure we have unit tests that change/fail if we
   1222        // assign incorrect case variants here:
   1223        if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
   1224            // 4.1. If hasMultiple
   1225            singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
   1226            singleCaseVariant = derivedTimesCases.value0(caseVariant);
   1227            pluralCategory = derivedTimesPlurals.value1(pluralCategory);
   1228            caseVariant = derivedTimesCases.value1(caseVariant);
   1229        } else {
   1230            singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
   1231            singleCaseVariant = derivedTimesCases.value1(caseVariant);
   1232        }
   1233 
   1234        // 4.2. Get the gender of that single_unit
   1235        MeasureUnit simpleUnit;
   1236        if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
   1237            // Ideally all simple units should be known, but they're not:
   1238            // 100-kilometer is internally treated as a simple unit, but it is
   1239            // not a built-in unit and does not have formatting data in CLDR 39.
   1240            //
   1241            // TODO(icu-units#28): test (desirable) invariants in unit tests.
   1242            status = U_UNSUPPORTED_ERROR;
   1243            return;
   1244        }
   1245        const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
   1246 
   1247        // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
   1248        U_ASSERT(singleUnit->dimensionality > 0);
   1249        int32_t dimensionality = singleUnit->dimensionality;
   1250        UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
   1251        if (dimensionality != 1) {
   1252            // 4.3.1. set dimensionalityPrefixPattern to be
   1253            //   getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
   1254            //   such as "{0} kwadratowym"
   1255            CharString dimensionalityKey("compound/power", status);
   1256            dimensionalityKey.appendNumber(dimensionality, status);
   1257            getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
   1258                                    singleCaseVariant, dimensionalityPrefixPatterns, status);
   1259            if (U_FAILURE(status)) {
   1260                // At the time of writing, only pow2 and pow3 are supported.
   1261                // Attempting to format other powers results in a
   1262                // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
   1263                // understand it:
   1264                if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
   1265                    status = U_UNSUPPORTED_ERROR;
   1266                }
   1267                return;
   1268            }
   1269 
   1270            // TODO(icu-units#139):
   1271            // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
   1272 
   1273            // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
   1274            singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
   1275            // 4.3.4. remove the dimensionality_prefix from singleUnit
   1276            singleUnit->dimensionality = 1;
   1277        }
   1278 
   1279        // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
   1280        UMeasurePrefix prefix = singleUnit->unitPrefix;
   1281        UnicodeString prefixPattern;
   1282        if (prefix != UMEASURE_PREFIX_ONE) {
   1283            // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
   1284            //        length), such as "centy{0}"
   1285            CharString prefixKey;
   1286            // prefixKey looks like "1024p3" or "10p-2":
   1287            prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
   1288            prefixKey.append('p', status);
   1289            prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
   1290            // Contains a pattern like "centy{0}".
   1291            prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
   1292 
   1293            // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
   1294            //
   1295            // TODO(icu-units#139): that refers to these rules:
   1296            // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
   1297            // though I'm not sure what other value they might end up having.
   1298            //
   1299            // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
   1300            //
   1301            // TODO(icu-units#139): that refers to:
   1302            // <deriveComponent feature="case" structure="prefix" value0="nominative"
   1303            // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
   1304            // propagates.
   1305 
   1306            // 4.4.4. remove the si_prefix from singleUnit
   1307            singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
   1308        }
   1309 
   1310        // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
   1311        //      singlePluralCategory, singleCaseVariant), such as "{0} metrem"
   1312        UnicodeString singleUnitArray[ARRAY_LENGTH];
   1313        // At this point we are left with a Simple Unit:
   1314        U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
   1315                 0);
   1316        getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
   1317                       status);
   1318        if (U_FAILURE(status)) {
   1319            // Shouldn't happen if we have data for all single units
   1320            return;
   1321        }
   1322 
   1323        // Calculate output gender
   1324        if (!singleUnitArray[GENDER_INDEX].isBogus()) {
   1325            U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
   1326            UnicodeString uVal;
   1327 
   1328            if (prefix != UMEASURE_PREFIX_ONE) {
   1329                singleUnitArray[GENDER_INDEX] =
   1330                    getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
   1331            }
   1332 
   1333            if (dimensionality != 1) {
   1334                singleUnitArray[GENDER_INDEX] =
   1335                    getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
   1336            }
   1337 
   1338            UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
   1339            if (timesGenderRule.length() == 1) {
   1340                switch (timesGenderRule[0]) {
   1341                case u'0':
   1342                    if (singleUnitIndex == 0) {
   1343                        U_ASSERT(outArray[GENDER_INDEX].isBogus());
   1344                        outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
   1345                    }
   1346                    break;
   1347                case u'1':
   1348                    if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
   1349                        U_ASSERT(outArray[GENDER_INDEX].isBogus());
   1350                        outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
   1351                    }
   1352                }
   1353            } else {
   1354                if (outArray[GENDER_INDEX].isBogus()) {
   1355                    outArray[GENDER_INDEX] = timesGenderRule;
   1356                }
   1357            }
   1358        }
   1359 
   1360        // Calculate resulting patterns for each plural form
   1361        for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
   1362            StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
   1363 
   1364            // singleUnitArray[pluralIndex] looks something like "{0} Meter"
   1365            if (outArray[pluralIndex].isBogus()) {
   1366                if (singleUnitArray[pluralIndex].isBogus()) {
   1367                    // Let the usual plural fallback mechanism take care of this
   1368                    // plural form
   1369                    continue;
   1370                } else {
   1371                    // Since our singleUnit can have a plural form that outArray
   1372                    // doesn't yet have (relying on fallback to OTHER), we start
   1373                    // by grabbing it with the normal plural fallback mechanism
   1374                    outArray[pluralIndex] = getWithPlural(outArray, plural, status);
   1375                    if (U_FAILURE(status)) {
   1376                        return;
   1377                    }
   1378                }
   1379            }
   1380 
   1381            if (uprv_strcmp(singlePluralCategory, "") != 0) {
   1382                plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
   1383            }
   1384 
   1385            // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
   1386            UnicodeString coreUnit;
   1387            PlaceholderPosition placeholderPosition;
   1388            char16_t joinerChar;
   1389            extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
   1390                               placeholderPosition, joinerChar);
   1391 
   1392            // 4.7 If the position is middle, then fail
   1393            if (placeholderPosition == PH_MIDDLE) {
   1394                status = U_UNSUPPORTED_ERROR;
   1395                return;
   1396            }
   1397 
   1398            // 4.8. If globalPlaceholder is empty
   1399            if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
   1400                globalPlaceholder[pluralIndex] = placeholderPosition;
   1401                globalJoinerChar = joinerChar;
   1402            } else {
   1403                // Expect all units involved to have the same placeholder position
   1404                U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
   1405                // TODO(icu-units#28): Do we want to add a unit test that checks
   1406                // for consistent joiner chars? Probably not, given how
   1407                // inconsistent they are. File a CLDR ticket with examples?
   1408            }
   1409            // Now coreUnit would be just "Meter"
   1410 
   1411            // 4.9. If siPrefixPattern is not empty
   1412            if (prefix != UMEASURE_PREFIX_ONE) {
   1413                SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
   1414                if (U_FAILURE(status)) {
   1415                    return;
   1416                }
   1417 
   1418                // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
   1419                //        coreUnit)
   1420                UnicodeString tmp;
   1421                // combineLowercasing(locale, length, prefixPattern, coreUnit)
   1422                //
   1423                // TODO(icu-units#28): run this only if prefixPattern does not
   1424                // contain space characters - do languages "as", "bn", "hi",
   1425                // "kk", etc have concepts of upper and lower case?:
   1426                if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
   1427                    coreUnit.toLower(loc);
   1428                }
   1429                prefixCompiled.format(coreUnit, tmp, status);
   1430                if (U_FAILURE(status)) {
   1431                    return;
   1432                }
   1433                coreUnit = tmp;
   1434            }
   1435 
   1436            // 4.10. If dimensionalityPrefixPattern is not empty
   1437            if (dimensionality != 1) {
   1438                SimpleFormatter dimensionalityCompiled(
   1439                    getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
   1440                if (U_FAILURE(status)) {
   1441                    return;
   1442                }
   1443 
   1444                // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
   1445                //         dimensionalityPrefixPattern, coreUnit)
   1446                UnicodeString tmp;
   1447                // combineLowercasing(locale, length, prefixPattern, coreUnit)
   1448                //
   1449                // TODO(icu-units#28): run this only if prefixPattern does not
   1450                // contain space characters - do languages "as", "bn", "hi",
   1451                // "kk", etc have concepts of upper and lower case?:
   1452                if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
   1453                    coreUnit.toLower(loc);
   1454                }
   1455                dimensionalityCompiled.format(coreUnit, tmp, status);
   1456                if (U_FAILURE(status)) {
   1457                    return;
   1458                }
   1459                coreUnit = tmp;
   1460            }
   1461 
   1462            if (outArray[pluralIndex].length() == 0) {
   1463                // 4.11. If the result is empty, set result to be coreUnit
   1464                outArray[pluralIndex] = coreUnit;
   1465            } else {
   1466                // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
   1467                UnicodeString tmp;
   1468                timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
   1469                outArray[pluralIndex] = tmp;
   1470            }
   1471        }
   1472    }
   1473 
   1474    // 5. Handling constant denominator if it exists.
   1475    if (productUnit.constantDenominator != 0) {
   1476        int32_t pluralIndex = -1;
   1477        for (int32_t index = 0; index < StandardPlural::Form::COUNT; index++) {
   1478            if (!outArray[index].isBogus()) {
   1479                pluralIndex = index;
   1480                break;
   1481            }
   1482        }
   1483 
   1484        U_ASSERT(pluralIndex >= 0); // "No plural form found for constant denominator"
   1485 
   1486        // TODO(ICU-23039):
   1487        // Improve the handling of constant_denominator representation.
   1488        // For instance, a constant_denominator of 1000000 should be adaptable to
   1489        // formats like
   1490        // 1,000,000, 1e6, or 1 million.
   1491        // Furthermore, ensure consistent pluralization rules for units. For example,
   1492        // "meter per 100 seconds" should be evaluated for correct singular/plural
   1493        // usage: "second" or "seconds"?
   1494        // Similarly, "kilogram per 1000 meters" should be checked for "meter" or
   1495        // "meters"?
   1496        if (outArray[pluralIndex].length() == 0) {
   1497            outArray[pluralIndex] = outArray[CONSTANT_DENOMINATOR_INDEX];
   1498        } else {
   1499            UnicodeString tmp;
   1500            timesPatternFormatter.format(outArray[CONSTANT_DENOMINATOR_INDEX], outArray[pluralIndex],
   1501                                         tmp, status);
   1502            outArray[pluralIndex] = tmp;
   1503        }
   1504    }
   1505 
   1506    for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
   1507        if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
   1508            UnicodeString tmp;
   1509            tmp.append(u"{0}", 3);
   1510            if (globalJoinerChar != 0) {
   1511                tmp.append(globalJoinerChar);
   1512            }
   1513            tmp.append(outArray[pluralIndex]);
   1514            outArray[pluralIndex] = tmp;
   1515        } else if (globalPlaceholder[pluralIndex] == PH_END) {
   1516            if (globalJoinerChar != 0) {
   1517                outArray[pluralIndex].append(globalJoinerChar);
   1518            }
   1519            outArray[pluralIndex].append(u"{0}", 3);
   1520        }
   1521    }
   1522 }
   1523 
   1524 UnicodeString LongNameHandler::getUnitDisplayName(
   1525        const Locale& loc,
   1526        const MeasureUnit& unit,
   1527        UNumberUnitWidth width,
   1528        UErrorCode& status) {
   1529    if (U_FAILURE(status)) {
   1530        return ICU_Utility::makeBogusString();
   1531    }
   1532    UnicodeString simpleFormats[ARRAY_LENGTH];
   1533    getMeasureData(loc, unit, width, "", simpleFormats, status);
   1534    return simpleFormats[DNAM_INDEX];
   1535 }
   1536 
   1537 UnicodeString LongNameHandler::getUnitPattern(
   1538        const Locale& loc,
   1539        const MeasureUnit& unit,
   1540        UNumberUnitWidth width,
   1541        StandardPlural::Form pluralForm,
   1542        UErrorCode& status) {
   1543    if (U_FAILURE(status)) {
   1544        return ICU_Utility::makeBogusString();
   1545    }
   1546    UnicodeString simpleFormats[ARRAY_LENGTH];
   1547    getMeasureData(loc, unit, width, "", simpleFormats, status);
   1548    // The above already handles fallback from other widths to short
   1549    if (U_FAILURE(status)) {
   1550        return ICU_Utility::makeBogusString();
   1551    }
   1552    // Now handle fallback from other plural forms to OTHER
   1553    return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
   1554            simpleFormats[StandardPlural::Form::OTHER];
   1555 }
   1556 
   1557 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
   1558                                                      const PluralRules *rules,
   1559                                                      const MicroPropsGenerator *parent,
   1560                                                      UErrorCode &status) {
   1561    LocalPointer<LongNameHandler> result(new LongNameHandler(rules, parent), status);
   1562    if (U_FAILURE(status)) {
   1563        return nullptr;
   1564    }
   1565    UnicodeString simpleFormats[ARRAY_LENGTH];
   1566    getCurrencyLongNameData(loc, currency, simpleFormats, status);
   1567    if (U_FAILURE(status)) { return nullptr; }
   1568    result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
   1569    // TODO(icu-units#28): currency gender?
   1570    return result.orphan();
   1571 }
   1572 
   1573 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
   1574                                               UErrorCode &status) {
   1575    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
   1576        StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
   1577        UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
   1578        if (U_FAILURE(status)) { return; }
   1579        SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
   1580        if (U_FAILURE(status)) { return; }
   1581        fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
   1582    }
   1583 }
   1584 
   1585 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
   1586                                                    Field field, UErrorCode &status) {
   1587    SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
   1588    if (U_FAILURE(status)) { return; }
   1589    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
   1590        StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
   1591        UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
   1592        if (U_FAILURE(status)) { return; }
   1593        UnicodeString compoundFormat;
   1594        if (leadFormat.length() == 0) {
   1595            compoundFormat = trailFormat;
   1596        } else {
   1597            trailCompiled.format(leadFormat, compoundFormat, status);
   1598            if (U_FAILURE(status)) { return; }
   1599        }
   1600        SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
   1601        if (U_FAILURE(status)) { return; }
   1602        fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
   1603    }
   1604 }
   1605 
   1606 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
   1607                                      UErrorCode &status) const {
   1608    if (parent != nullptr) {
   1609        parent->processQuantity(quantity, micros, status);
   1610    }
   1611    StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
   1612    micros.modOuter = &fModifiers[pluralForm];
   1613    micros.gender = gender;
   1614 }
   1615 
   1616 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
   1617    return &fModifiers[plural];
   1618 }
   1619 
   1620 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
   1621                                              const MeasureUnit &mixedUnit,
   1622                                              const UNumberUnitWidth &width,
   1623                                              const char *unitDisplayCase,
   1624                                              const PluralRules *rules,
   1625                                              const MicroPropsGenerator *parent,
   1626                                              MixedUnitLongNameHandler *fillIn,
   1627                                              UErrorCode &status) {
   1628    U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
   1629    U_ASSERT(fillIn != nullptr);
   1630    if (U_FAILURE(status)) {
   1631        return;
   1632    }
   1633 
   1634    MeasureUnitImpl temp;
   1635    const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
   1636    // Defensive, for production code:
   1637    if (impl.complexity != UMEASURE_UNIT_MIXED) {
   1638        // Should be using the normal LongNameHandler
   1639        status = U_UNSUPPORTED_ERROR;
   1640        return;
   1641    }
   1642 
   1643    fillIn->fMixedUnitCount = impl.singleUnits.length();
   1644    fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
   1645    for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
   1646        // Grab data for each of the components.
   1647        UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
   1648        // TODO(CLDR-14582): check from the CLDR-14582 ticket whether this
   1649        // propagation of unitDisplayCase is correct:
   1650        getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
   1651                       status);
   1652        // TODO(ICU-21494): if we add support for gender for mixed units, we may
   1653        // need maybeCalculateGender() here.
   1654    }
   1655 
   1656    // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
   1657    // high-magnitude fields
   1658    // * for mixed units count N, produce N listFormatters, one for each subset
   1659    //   that might be formatted.
   1660    UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
   1661    if (width == UNUM_UNIT_WIDTH_NARROW) {
   1662        listWidth = ULISTFMT_WIDTH_NARROW;
   1663    } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
   1664        // This might be the same as SHORT in most languages:
   1665        listWidth = ULISTFMT_WIDTH_WIDE;
   1666    }
   1667    fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
   1668        ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
   1669    // TODO(ICU-21494): grab gender of each unit, calculate the gender
   1670    // associated with this list formatter, save it for later.
   1671    fillIn->rules = rules;
   1672    fillIn->parent = parent;
   1673 
   1674    // We need a localised NumberFormatter for the numbers of the bigger units
   1675    // (providing Arabic numerals, for example).
   1676    fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
   1677 }
   1678 
   1679 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
   1680                                               UErrorCode &status) const {
   1681    U_ASSERT(fMixedUnitCount > 1);
   1682    if (parent != nullptr) {
   1683        parent->processQuantity(quantity, micros, status);
   1684    }
   1685    micros.modOuter = getMixedUnitModifier(quantity, micros, status);
   1686 }
   1687 
   1688 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
   1689                                                               MicroProps &micros,
   1690                                                               UErrorCode &status) const {
   1691    if (micros.mixedMeasuresCount == 0) {
   1692        U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
   1693        status = U_UNSUPPORTED_ERROR;
   1694        return &micros.helpers.emptyWeakModifier;
   1695    }
   1696 
   1697    // Algorithm:
   1698    //
   1699    // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
   1700    // find "3 yard" and "1 foot" in micros.mixedMeasures.
   1701    //
   1702    // Obtain long-names with plural forms corresponding to measure values:
   1703    //   * {0} yards, {0} foot, {0} inches
   1704    //
   1705    // Format the integer values appropriately and modify with the format
   1706    // strings:
   1707    //   - 3 yards, 1 foot
   1708    //
   1709    // Use ListFormatter to combine, with one placeholder:
   1710    //   - 3 yards, 1 foot and {0} inches
   1711    //
   1712    // Return a SimpleModifier for this pattern, letting the rest of the
   1713    // pipeline take care of the remaining inches.
   1714 
   1715    LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
   1716    if (U_FAILURE(status)) {
   1717        return &micros.helpers.emptyWeakModifier;
   1718    }
   1719 
   1720    StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
   1721    for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
   1722        DecimalQuantity fdec;
   1723 
   1724        // If numbers are negative, only the first number needs to have its
   1725        // negative sign formatted.
   1726        int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
   1727 
   1728        if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
   1729            // If quantity is not the first value and quantity is negative
   1730            if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
   1731                quantity.negate();
   1732            }
   1733 
   1734            StandardPlural::Form quantityPlural =
   1735                utils::getPluralSafe(micros.rounder, rules, quantity, status);
   1736            UnicodeString quantityFormatWithPlural =
   1737                getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
   1738            SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
   1739            quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
   1740        } else {
   1741            fdec.setToLong(number);
   1742            StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
   1743            UnicodeString simpleFormat =
   1744                getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
   1745            SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
   1746            UnicodeString num;
   1747            auto appendable = UnicodeStringAppendable(num);
   1748 
   1749            fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
   1750            compiledFormatter.format(num, outputMeasuresList[i], status);
   1751        }
   1752    }
   1753 
   1754    // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
   1755    // can set micros.gender to the gender associated with the list formatter in
   1756    // use below (once we have correct support for that). And then document this
   1757    // appropriately? "getMixedUnitModifier" doesn't sound like it would do
   1758    // something like this.
   1759 
   1760    // Combine list into a "premixed" pattern
   1761    UnicodeString premixedFormatPattern;
   1762    fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
   1763                           status);
   1764    SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
   1765    if (U_FAILURE(status)) {
   1766        return &micros.helpers.emptyWeakModifier;
   1767    }
   1768 
   1769    micros.helpers.mixedUnitModifier =
   1770        SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
   1771    return &micros.helpers.mixedUnitModifier;
   1772 }
   1773 
   1774 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
   1775                                                      StandardPlural::Form /*plural*/) const {
   1776    // TODO(icu-units#28): investigate this method when investigating where
   1777    // ModifierStore::getModifier() gets used. To be sure it remains
   1778    // unreachable:
   1779    UPRV_UNREACHABLE_EXIT;
   1780    return nullptr;
   1781 }
   1782 
   1783 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
   1784                                                          const MaybeStackVector<MeasureUnit> &units,
   1785                                                          const UNumberUnitWidth &width,
   1786                                                          const char *unitDisplayCase,
   1787                                                          const PluralRules *rules,
   1788                                                          const MicroPropsGenerator *parent,
   1789                                                          UErrorCode &status) {
   1790    LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
   1791    if (U_FAILURE(status)) {
   1792        return nullptr;
   1793    }
   1794    U_ASSERT(units.length() > 0);
   1795    if (result->fHandlers.resize(units.length()) == nullptr) {
   1796        status = U_MEMORY_ALLOCATION_ERROR;
   1797        return nullptr;
   1798    }
   1799    result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
   1800    for (int32_t i = 0, length = units.length(); i < length; i++) {
   1801        const MeasureUnit &unit = *units[i];
   1802        result->fMeasureUnits[i] = unit;
   1803        if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
   1804            MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
   1805            MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr,
   1806                                                     mlnh, status);
   1807            result->fHandlers[i] = mlnh;
   1808        } else {
   1809            LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
   1810            LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, lnh, status);
   1811            result->fHandlers[i] = lnh;
   1812        }
   1813        if (U_FAILURE(status)) {
   1814            return nullptr;
   1815        }
   1816    }
   1817    return result.orphan();
   1818 }
   1819 
   1820 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
   1821                                          UErrorCode &status) const {
   1822    // We call parent->processQuantity() from the Multiplexer, instead of
   1823    // letting LongNameHandler handle it: we don't know which LongNameHandler to
   1824    // call until we've called the parent!
   1825    fParent->processQuantity(quantity, micros, status);
   1826 
   1827    // Call the correct LongNameHandler based on outputUnit
   1828    for (int i = 0; i < fHandlers.getCapacity(); i++) {
   1829        if (fMeasureUnits[i] == micros.outputUnit) {
   1830            fHandlers[i]->processQuantity(quantity, micros, status);
   1831            return;
   1832        }
   1833    }
   1834    if (U_FAILURE(status)) {
   1835        return;
   1836    }
   1837    // We shouldn't receive any outputUnit for which we haven't already got a
   1838    // LongNameHandler:
   1839    status = U_INTERNAL_PROGRAM_ERROR;
   1840 }
   1841 
   1842 #endif /* #if !UCONFIG_NO_FORMATTING */