number_longnames.cpp (80047B)
1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 #include <cstdlib> 9 10 #include "unicode/simpleformatter.h" 11 #include "unicode/ures.h" 12 #include "unicode/plurrule.h" 13 #include "unicode/strenum.h" 14 #include "ureslocs.h" 15 #include "charstr.h" 16 #include "uresimp.h" 17 #include "measunit_impl.h" 18 #include "number_longnames.h" 19 #include "number_microprops.h" 20 #include <algorithm> 21 #include "cstring.h" 22 #include "util.h" 23 #include "sharedpluralrules.h" 24 25 using namespace icu; 26 using namespace icu::number; 27 using namespace icu::number::impl; 28 29 namespace { 30 31 /** 32 * Display Name (this format has no placeholder). 33 * 34 * Used as an index into the LongNameHandler::simpleFormats array. Units 35 * resources cover the normal set of PluralRules keys, as well as `dnam` and 36 * `per` forms. 37 */ 38 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; 39 /** 40 * "per" form (e.g. "{0} per day" is day's "per" form). 41 * 42 * Used as an index into the LongNameHandler::simpleFormats array. Units 43 * resources cover the normal set of PluralRules keys, as well as `dnam` and 44 * `per` forms. 45 */ 46 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; 47 /** 48 * Gender of the word, in languages with grammatical gender. 49 */ 50 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; 51 /** 52 * Denominator constant of the unit. 53 */ 54 constexpr int32_t CONSTANT_DENOMINATOR_INDEX = StandardPlural::Form::COUNT + 3; 55 // Number of keys in the array populated by PluralTableSink. 56 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 4; 57 58 // TODO(icu-units#28): load this list from resources, after creating a "&set" 59 // function for use in ldml2icu rules. 60 const int32_t GENDER_COUNT = 7; 61 const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", 62 "masculine", "neuter", "personal"}; 63 64 // Converts a UnicodeString to a const char*, either pointing to a string in 65 // gGenders, or pointing to an empty string if an appropriate string was not 66 // found. 67 const char *getGenderString(UnicodeString uGender, UErrorCode status) { 68 if (uGender.length() == 0) { 69 return ""; 70 } 71 CharString gender; 72 gender.appendInvariantChars(uGender, status); 73 if (U_FAILURE(status)) { 74 return ""; 75 } 76 int32_t first = 0; 77 int32_t last = GENDER_COUNT; 78 while (first < last) { 79 int32_t mid = (first + last) / 2; 80 int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]); 81 if (cmp == 0) { 82 return gGenders[mid]; 83 } else if (cmp > 0) { 84 first = mid + 1; 85 } else if (cmp < 0) { 86 last = mid; 87 } 88 } 89 // We don't return an error in case our gGenders list is incomplete in 90 // production. 91 // 92 // TODO(icu-units#28): a unit test checking all locales' genders are covered 93 // by gGenders? Else load a complete list of genders found in 94 // grammaticalFeatures in an initOnce. 95 return ""; 96 } 97 98 // Returns the array index that corresponds to the given pluralKeyword. 99 int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { 100 // pluralKeyword can also be "dnam", "per", or "gender" 101 switch (*pluralKeyword) { 102 case 'd': 103 if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) { 104 return DNAM_INDEX; 105 } 106 break; 107 case 'g': 108 if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) { 109 return GENDER_INDEX; 110 } 111 break; 112 case 'p': 113 if (uprv_strcmp(pluralKeyword + 1, "er") == 0) { 114 return PER_INDEX; 115 } 116 break; 117 default: 118 break; 119 } 120 StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); 121 return plural; 122 } 123 124 // Selects a string out of the `strings` array which corresponds to the 125 // specified plural form, with fallback to the OTHER form. 126 // 127 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each 128 // of the plural forms, plus a display name ("dnam") and a "per" form. 129 UnicodeString getWithPlural( 130 const UnicodeString* strings, 131 StandardPlural::Form plural, 132 UErrorCode& status) { 133 UnicodeString result = strings[plural]; 134 if (result.isBogus()) { 135 result = strings[StandardPlural::Form::OTHER]; 136 } 137 if (result.isBogus()) { 138 // There should always be data in the "other" plural variant. 139 status = U_INTERNAL_PROGRAM_ERROR; 140 } 141 return result; 142 } 143 144 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END }; 145 146 /** 147 * Returns three outputs extracted from pattern. 148 * 149 * @param coreUnit is extracted as per Extract(...) in the spec: 150 * https://unicode.org/reports/tr35/tr35-general.html#compound-units 151 * @param PlaceholderPosition indicates where in the string the placeholder was 152 * found. 153 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar 154 * contains the space character (if any) that separated the placeholder from 155 * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one 156 * space character is considered. 157 */ 158 void extractCorePattern(const UnicodeString &pattern, 159 UnicodeString &coreUnit, 160 PlaceholderPosition &placeholderPosition, 161 char16_t &joinerChar) { 162 joinerChar = 0; 163 int32_t len = pattern.length(); 164 if (pattern.startsWith(u"{0}", 3)) { 165 placeholderPosition = PH_BEGINNING; 166 if (u_isJavaSpaceChar(pattern[3])) { 167 joinerChar = pattern[3]; 168 coreUnit.setTo(pattern, 4, len - 4); 169 } else { 170 coreUnit.setTo(pattern, 3, len - 3); 171 } 172 } else if (pattern.endsWith(u"{0}", 3)) { 173 placeholderPosition = PH_END; 174 if (u_isJavaSpaceChar(pattern[len - 4])) { 175 coreUnit.setTo(pattern, 0, len - 4); 176 joinerChar = pattern[len - 4]; 177 } else { 178 coreUnit.setTo(pattern, 0, len - 3); 179 } 180 } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) { 181 placeholderPosition = PH_NONE; 182 coreUnit = pattern; 183 } else { 184 placeholderPosition = PH_MIDDLE; 185 coreUnit = pattern; 186 } 187 } 188 189 ////////////////////////// 190 /// BEGIN DATA LOADING /// 191 ////////////////////////// 192 193 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty 194 // string both in case of unknown gender and in case of unknown unit. 195 UnicodeString 196 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { 197 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); 198 if (U_FAILURE(status)) { return {}; } 199 200 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... 201 // TODO(ICU-20400): Get duration-*-person data properly with aliases. 202 StringPiece subtypeForResource; 203 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype())); 204 if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) { 205 subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7}; 206 } else { 207 subtypeForResource = builtinUnit.getSubtype(); 208 } 209 210 CharString key; 211 key.append("units/", status); 212 key.append(builtinUnit.getType(), status); 213 key.append("/", status); 214 key.append(subtypeForResource, status); 215 key.append("/gender", status); 216 217 UErrorCode localStatus = status; 218 int32_t resultLen = 0; 219 const char16_t *result = 220 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus); 221 if (U_SUCCESS(localStatus)) { 222 status = localStatus; 223 return UnicodeString(true, result, resultLen); 224 } else { 225 // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to 226 // check whether the parent "$unitRes" exists? Then we could return 227 // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not 228 // being a builtin). 229 return {}; 230 } 231 } 232 233 // Loads data from a resource tree with paths matching 234 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases 235 // and genders. 236 // 237 // An InflectedPluralSink is configured to load data for a specific gender and 238 // case. It loads all plural forms, because selection between plural forms is 239 // dependent upon the value being formatted. 240 // 241 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at 242 // units/compound/power2: German has case, French has differences for gender, 243 // but no case. 244 // 245 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the 246 // tree structures are different. After homogenizing the structures, we may be 247 // able to unify the two classes. 248 // 249 // TODO: Spec violation: expects presence of "count" - does not fallback to an 250 // absent "count"! If this fallback were added, getCompoundValue could be 251 // superseded? 252 class InflectedPluralSink : public ResourceSink { 253 public: 254 // Accepts `char*` rather than StringPiece because 255 // ResourceTable::findValue(...) requires a null-terminated `char*`. 256 // 257 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds 258 // checking is performed. 259 explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray) 260 : gender(gender), caseVariant(caseVariant), outArray(outArray) { 261 // Initialize the array to bogus strings. 262 for (int32_t i = 0; i < ARRAY_LENGTH; i++) { 263 outArray[i].setToBogus(); 264 } 265 } 266 267 // See ResourceSink::put(). 268 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { 269 int32_t pluralIndex = getIndex(key, status); 270 if (U_FAILURE(status)) { return; } 271 if (!outArray[pluralIndex].isBogus()) { 272 // We already have a pattern 273 return; 274 } 275 ResourceTable genderTable = value.getTable(status); 276 ResourceTable caseTable; // This instance has to outlive `value` 277 if (loadForPluralForm(genderTable, caseTable, value, status)) { 278 outArray[pluralIndex] = value.getUnicodeString(status); 279 } 280 } 281 282 private: 283 // Tries to load data for the configured gender from `genderTable`. Returns 284 // true if found, returning the data in `value`. The returned data will be 285 // for the configured gender if found, falling back to "neuter" and 286 // no-gender if not. The caseTable parameter holds the intermediate 287 // ResourceTable for the sake of lifetime management. 288 bool loadForPluralForm(const ResourceTable &genderTable, 289 ResourceTable &caseTable, 290 ResourceValue &value, 291 UErrorCode &status) { 292 if (uprv_strcmp(gender, "") != 0) { 293 if (loadForGender(genderTable, gender, caseTable, value, status)) { 294 return true; 295 } 296 if (uprv_strcmp(gender, "neuter") != 0 && 297 loadForGender(genderTable, "neuter", caseTable, value, status)) { 298 return true; 299 } 300 } 301 if (loadForGender(genderTable, "_", caseTable, value, status)) { 302 return true; 303 } 304 return false; 305 } 306 307 // Tries to load data for the given gender from `genderTable`. Returns true 308 // if found, returning the data in `value`. The returned data will be for 309 // the configured case if found, falling back to "nominative" and no-case if 310 // not. 311 bool loadForGender(const ResourceTable &genderTable, 312 const char *genderVal, 313 ResourceTable &caseTable, 314 ResourceValue &value, 315 UErrorCode &status) { 316 if (!genderTable.findValue(genderVal, value)) { 317 return false; 318 } 319 caseTable = value.getTable(status); 320 if (uprv_strcmp(caseVariant, "") != 0) { 321 if (loadForCase(caseTable, caseVariant, value)) { 322 return true; 323 } 324 if (uprv_strcmp(caseVariant, "nominative") != 0 && 325 loadForCase(caseTable, "nominative", value)) { 326 return true; 327 } 328 } 329 if (loadForCase(caseTable, "_", value)) { 330 return true; 331 } 332 return false; 333 } 334 335 // Tries to load data for the given case from `caseTable`. Returns true if 336 // found, returning the data in `value`. 337 bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) { 338 if (!caseTable.findValue(caseValue, value)) { 339 return false; 340 } 341 return true; 342 } 343 344 const char *gender; 345 const char *caseVariant; 346 UnicodeString *outArray; 347 }; 348 349 // Fetches localised formatting patterns for the given subKey. See documentation 350 // for InflectedPluralSink for details. 351 // 352 // Data is loaded for the appropriate unit width, with missing data filled in 353 // from unitsShort. 354 void getInflectedMeasureData(StringPiece subKey, 355 const Locale &locale, 356 const UNumberUnitWidth &width, 357 const char *gender, 358 const char *caseVariant, 359 UnicodeString *outArray, 360 UErrorCode &status) { 361 InflectedPluralSink sink(gender, caseVariant, outArray); 362 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); 363 if (U_FAILURE(status)) { return; } 364 365 CharString key; 366 key.append("units", status); 367 if (width == UNUM_UNIT_WIDTH_NARROW) { 368 key.append("Narrow", status); 369 } else if (width == UNUM_UNIT_WIDTH_SHORT) { 370 key.append("Short", status); 371 } 372 key.append("/", status); 373 key.append(subKey, status); 374 375 UErrorCode localStatus = status; 376 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); 377 if (width == UNUM_UNIT_WIDTH_SHORT) { 378 status = localStatus; 379 return; 380 } 381 } 382 383 class PluralTableSink : public ResourceSink { 384 public: 385 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds 386 // checking is performed. 387 explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { 388 // Initialize the array to bogus strings. 389 for (int32_t i = 0; i < ARRAY_LENGTH; i++) { 390 outArray[i].setToBogus(); 391 } 392 } 393 394 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { 395 if (uprv_strcmp(key, "case") == 0) { 396 return; 397 } 398 int32_t index = getIndex(key, status); 399 if (U_FAILURE(status)) { return; } 400 if (!outArray[index].isBogus()) { 401 return; 402 } 403 outArray[index] = value.getUnicodeString(status); 404 if (U_FAILURE(status)) { return; } 405 } 406 407 private: 408 UnicodeString *outArray; 409 }; 410 411 /** 412 * Populates outArray with `locale`-specific values for `unit` through use of 413 * PluralTableSink. Only the set of basic units are supported! 414 * 415 * Reading from resources *unitsNarrow* and *unitsShort* (for width 416 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width 417 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". 418 * 419 * @param unit must be a built-in unit, i.e. must have a type and subtype, 420 * listed in gTypes and gSubTypes in measunit.cpp. 421 * @param unitDisplayCase the empty string and "nominative" are treated the 422 * same. For other cases, strings for the requested case are used if found. 423 * (For any missing case-specific data, we fall back to nominative.) 424 * @param outArray must be of fixed length ARRAY_LENGTH. 425 */ 426 void getMeasureData(const Locale &locale, 427 const MeasureUnit &unit, 428 const UNumberUnitWidth &width, 429 const char *unitDisplayCase, 430 UnicodeString *outArray, 431 UErrorCode &status) { 432 PluralTableSink sink(outArray); 433 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); 434 if (U_FAILURE(status)) { return; } 435 436 CharString subKey; 437 subKey.append("/", status); 438 subKey.append(unit.getType(), status); 439 subKey.append("/", status); 440 441 // TODO(ICU-23226): Refactor LongNameHandler to use gUnitAliases and gUnitReplacements measunit_extra.cpp instead of local reasource bundle. 442 // Check if unitSubType is an alias or not. 443 LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status)); 444 445 UErrorCode aliasStatus = status; 446 StackUResourceBundle aliasFillIn; 447 CharString aliasKey; 448 aliasKey.append("alias/unit/", aliasStatus); 449 aliasKey.append(unit.getSubtype(), aliasStatus); 450 aliasKey.append("/replacement", aliasStatus); 451 ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(), 452 &aliasStatus); 453 CharString unitSubType; 454 if (!U_FAILURE(aliasStatus)) { 455 // This means the subType is an alias. Then, replace unitSubType with the replacement. 456 auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status); 457 unitSubType.appendInvariantChars(replacement, status); 458 } else { 459 unitSubType.append(unit.getSubtype(), status); 460 } 461 462 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... 463 // TODO(ICU-20400): Get duration-*-person data properly with aliases. 464 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data())); 465 if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) { 466 subKey.append({unitSubType.data(), subtypeLen - 7}, status); 467 } else { 468 subKey.append({unitSubType.data(), subtypeLen}, status); 469 } 470 471 if (width != UNUM_UNIT_WIDTH_FULL_NAME) { 472 UErrorCode localStatus = status; 473 CharString genderKey; 474 genderKey.append("units", localStatus); 475 genderKey.append(subKey, localStatus); 476 genderKey.append("/gender", localStatus); 477 StackUResourceBundle fillIn; 478 ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(), 479 &localStatus); 480 outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus); 481 } 482 483 CharString key; 484 key.append("units", status); 485 if (width == UNUM_UNIT_WIDTH_NARROW) { 486 key.append("Narrow", status); 487 } else if (width == UNUM_UNIT_WIDTH_SHORT) { 488 key.append("Short", status); 489 } 490 key.append(subKey, status); 491 492 // Grab desired case first, if available. Then grab no-case data to fill in 493 // the gaps. 494 if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) { 495 CharString caseKey; 496 caseKey.append(key, status); 497 caseKey.append("/case/", status); 498 caseKey.append(unitDisplayCase, status); 499 500 UErrorCode localStatus = U_ZERO_ERROR; 501 // TODO(icu-units#138): our fallback logic is not spec-compliant: 502 // lateral fallback should happen before locale fallback. Switch to 503 // getInflectedMeasureData after homogenizing data format? Find a unit 504 // test case that demonstrates the incorrect fallback logic (via 505 // regional variant of an inflected language?) 506 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); 507 } 508 509 // TODO(icu-units#138): our fallback logic is not spec-compliant: we 510 // check the given case, then go straight to the no-case data. The spec 511 // states we should first look for case="nominative". As part of #138, 512 // either get the spec changed, or add unit tests that warn us if 513 // case="nominative" data differs from no-case data? 514 UErrorCode localStatus = U_ZERO_ERROR; 515 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); 516 if (width == UNUM_UNIT_WIDTH_SHORT) { 517 if (U_FAILURE(localStatus)) { 518 status = localStatus; 519 } 520 return; 521 } 522 } 523 524 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. 525 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, 526 UErrorCode &status) { 527 // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. 528 // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? 529 PluralTableSink sink(outArray); 530 // Here all outArray entries are bogus. 531 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); 532 if (U_FAILURE(status)) { return; } 533 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); 534 if (U_FAILURE(status)) { return; } 535 // Here the outArray[] entries are filled in with any CurrencyUnitPatterns data for locale, 536 // or if there is no CurrencyUnitPatterns data for locale since the patterns all inherited 537 // from the "other" pattern in root (which is true for many locales in CLDR 46), then only 538 // the "other" entry has a currency pattern. So now what we do is: For all valid plural keywords 539 // for the locale, if the corresponding outArray[] entry is bogus, fill it in from the "other" 540 // entry. In the longer run, clients of this should instead consider using CurrencyPluralInfo 541 // (see i18n/unicode/currpinf.h). 542 UErrorCode localStatus = U_ZERO_ERROR; 543 const SharedPluralRules *pr = PluralRules::createSharedInstance( 544 locale, UPLURAL_TYPE_CARDINAL, localStatus); 545 if (U_SUCCESS(localStatus)) { 546 LocalPointer<StringEnumeration> keywords((*pr)->getKeywords(localStatus), localStatus); 547 if (U_SUCCESS(localStatus)) { 548 const char* keyword; 549 while (((keyword = keywords->next(nullptr, localStatus)) != nullptr) && U_SUCCESS(localStatus)) { 550 int32_t index = StandardPlural::indexOrOtherIndexFromString(keyword); 551 if (index != StandardPlural::Form::OTHER && outArray[index].isBogus()) { 552 outArray[index].setTo(outArray[StandardPlural::Form::OTHER]); 553 } 554 } 555 } 556 pr->removeRef(); 557 } 558 559 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { 560 UnicodeString &pattern = outArray[i]; 561 if (pattern.isBogus()) { 562 continue; 563 } 564 int32_t longNameLen = 0; 565 const char16_t *longName = ucurr_getPluralName( 566 currency.getISOCurrency(), 567 locale.getName(), 568 nullptr /* isChoiceFormat */, 569 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)), 570 &longNameLen, 571 &status); 572 // Example pattern from data: "{0} {1}" 573 // Example output after find-and-replace: "{0} US dollars" 574 pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen)); 575 } 576 } 577 578 UnicodeString getCompoundValue(StringPiece compoundKey, 579 const Locale &locale, 580 const UNumberUnitWidth &width, 581 UErrorCode &status) { 582 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); 583 if (U_FAILURE(status)) { return {}; } 584 CharString key; 585 key.append("units", status); 586 if (width == UNUM_UNIT_WIDTH_NARROW) { 587 key.append("Narrow", status); 588 } else if (width == UNUM_UNIT_WIDTH_SHORT) { 589 key.append("Short", status); 590 } 591 key.append("/compound/", status); 592 key.append(compoundKey, status); 593 594 UErrorCode localStatus = status; 595 int32_t len = 0; 596 const char16_t *ptr = 597 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus); 598 if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) { 599 // Fall back to short, which contains more compound data 600 key.clear(); 601 key.append("unitsShort/compound/", status); 602 key.append(compoundKey, status); 603 ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); 604 } else { 605 status = localStatus; 606 } 607 if (U_FAILURE(status)) { 608 return {}; 609 } 610 return UnicodeString(ptr, len); 611 } 612 613 /** 614 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. 615 * 616 * Consider a deriveComponent rule that looks like this: 617 * 618 * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/> 619 * 620 * Instantiating an instance as follows: 621 * 622 * DerivedComponents d(loc, "case", "per"); 623 * 624 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", 625 * and `d.value1("foo")` will be "nominative". 626 * 627 * The values returned by value0(...) and value1(...) are valid only while the 628 * instance exists. In case of any kind of failure, value0(...) and value1(...) 629 * will return "". 630 */ 631 class DerivedComponents { 632 public: 633 /** 634 * Constructor. 635 * 636 * The feature and structure parameters must be null-terminated. The string 637 * referenced by compoundValue must exist for longer than the 638 * DerivedComponents instance. 639 */ 640 DerivedComponents(const Locale &locale, const char *feature, const char *structure) { 641 StackUResourceBundle derivationsBundle, stackBundle; 642 ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status); 643 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), 644 &status); 645 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), 646 &status); 647 if (U_FAILURE(status)) { 648 return; 649 } 650 UErrorCode localStatus = U_ZERO_ERROR; 651 // TODO(icu-units#28): use standard normal locale resolution algorithms 652 // rather than just grabbing language: 653 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), 654 &localStatus); 655 // TODO(icu-units#28): 656 // - code currently assumes if the locale exists, the rules are there - 657 // instead of falling back to root when the requested rule is missing. 658 // - investigate ures.h functions, see if one that uses res_findResource() 659 // might be better (or use res_findResource directly), or maybe help 660 // improve ures documentation to guide function selection? 661 if (localStatus == U_MISSING_RESOURCE_ERROR) { 662 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); 663 } else { 664 status = localStatus; 665 } 666 ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); 667 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); 668 ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); 669 UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); 670 UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); 671 if (U_SUCCESS(status)) { 672 if (val0.compare(UnicodeString(u"compound")) == 0) { 673 compound0_ = true; 674 } else { 675 compound0_ = false; 676 value0_.appendInvariantChars(val0, status); 677 } 678 if (val1.compare(UnicodeString(u"compound")) == 0) { 679 compound1_ = true; 680 } else { 681 compound1_ = false; 682 value1_.appendInvariantChars(val1, status); 683 } 684 } 685 } 686 687 // Returns a StringPiece that is only valid as long as the instance exists. 688 StringPiece value0(const StringPiece compoundValue) const { 689 return compound0_ ? compoundValue : value0_.toStringPiece(); 690 } 691 692 // Returns a StringPiece that is only valid as long as the instance exists. 693 StringPiece value1(const StringPiece compoundValue) const { 694 return compound1_ ? compoundValue : value1_.toStringPiece(); 695 } 696 697 // Returns a char* that is only valid as long as the instance exists. 698 const char *value0(const char *compoundValue) const { 699 return compound0_ ? compoundValue : value0_.data(); 700 } 701 702 // Returns a char* that is only valid as long as the instance exists. 703 const char *value1(const char *compoundValue) const { 704 return compound1_ ? compoundValue : value1_.data(); 705 } 706 707 private: 708 UErrorCode status = U_ZERO_ERROR; 709 710 // Holds strings referred to by value0 and value1; 711 bool compound0_ = false, compound1_ = false; 712 CharString value0_, value1_; 713 }; 714 715 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding 716 // testsuite support for testing with synthetic data? 717 /** 718 * Loads and returns the value in rules that look like these: 719 * 720 * <deriveCompound feature="gender" structure="per" value="0"/> 721 * <deriveCompound feature="gender" structure="times" value="1"/> 722 * 723 * Currently a fake example, but spec compliant: 724 * <deriveCompound feature="gender" structure="power" value="feminine"/> 725 * 726 * NOTE: If U_FAILURE(status), returns an empty string. 727 */ 728 UnicodeString 729 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { 730 StackUResourceBundle derivationsBundle, stackBundle; 731 ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status); 732 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), 733 &status); 734 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); 735 // TODO: use standard normal locale resolution algorithms rather than just grabbing language: 736 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); 737 // TODO: 738 // - code currently assumes if the locale exists, the rules are there - 739 // instead of falling back to root when the requested rule is missing. 740 // - investigate ures.h functions, see if one that uses res_findResource() 741 // might be better (or use res_findResource directly), or maybe help 742 // improve ures documentation to guide function selection? 743 if (status == U_MISSING_RESOURCE_ERROR) { 744 status = U_ZERO_ERROR; 745 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); 746 } 747 ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); 748 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); 749 UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); 750 if (U_FAILURE(status)) { 751 return {}; 752 } 753 U_ASSERT(!uVal.isBogus()); 754 return uVal; 755 } 756 757 // Returns the gender string for structures following these rules: 758 // 759 // <deriveCompound feature="gender" structure="per" value="0"/> 760 // <deriveCompound feature="gender" structure="times" value="1"/> 761 // 762 // Fake example: 763 // <deriveCompound feature="gender" structure="power" value="feminine"/> 764 // 765 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that 766 // correspond to value="0" and value="1". 767 // 768 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. 769 // "prefix" doesn't). 770 UnicodeString getDerivedGender(Locale locale, 771 const char *structure, 772 UnicodeString *data0, 773 UnicodeString *data1, 774 UErrorCode &status) { 775 UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status); 776 if (val.length() == 1) { 777 switch (val[0]) { 778 case u'0': 779 return data0[GENDER_INDEX]; 780 case u'1': 781 if (data1 == nullptr) { 782 return {}; 783 } 784 return data1[GENDER_INDEX]; 785 } 786 } 787 return val; 788 } 789 790 //////////////////////// 791 /// END DATA LOADING /// 792 //////////////////////// 793 794 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace 795 const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) { 796 if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) { 797 return s; 798 } 799 int32_t start = 0; 800 int32_t limit = length; 801 while (start < limit && u_isJavaSpaceChar(s[start])) { 802 ++start; 803 } 804 if (start < limit) { 805 // There is non-white space at start; we will not move limit below that, 806 // so we need not test start<limit in the loop. 807 while (u_isJavaSpaceChar(s[limit - 1])) { 808 --limit; 809 } 810 } 811 length = limit - start; 812 return s + start; 813 } 814 815 /** 816 * Calculates the gender of an arbitrary unit: this is the *second* 817 * implementation of an algorithm to do this: 818 * 819 * Gender is also calculated in "processPatternTimes": that code path is "bottom 820 * up", loading the gender for every component of a compound unit (at the same 821 * time as loading the Long Names formatting patterns), even if the gender is 822 * unneeded, then combining the single units' genders into the compound unit's 823 * gender, according to the rules. This algorithm does a lazier "top-down" 824 * evaluation, starting with the compound unit, calculating which single unit's 825 * gender is needed by breaking it down according to the rules, and then loading 826 * only the gender of the one single unit who's gender is needed. 827 * 828 * For future refactorings: 829 * 1. we could drop processPatternTimes' gender calculation and just call this 830 * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very 831 * same table as the formatting patterns, so loading it then may be 832 * efficient. For other unit widths however, it needs to be explicitly looked 833 * up anyway. 834 * 2. alternatively, if CLDR is providing all the genders we need such that we 835 * don't need to calculate them in ICU anymore, we could drop this function 836 * and keep only processPatternTimes' calculation. (And optimise it a bit?) 837 * 838 * @param locale The desired locale. 839 * @param unit The measure unit to calculate the gender for. 840 * @return The gender string for the unit, or an empty string if unknown or 841 * ungendered. 842 */ 843 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) { 844 MeasureUnitImpl impl; 845 const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status); 846 int32_t singleUnitIndex = 0; 847 if (mui.complexity == UMEASURE_UNIT_COMPOUND) { 848 int32_t startSlice = 0; 849 // inclusive 850 int32_t endSlice = mui.singleUnits.length()-1; 851 U_ASSERT(endSlice > 0); // Else it would not be COMPOUND 852 if (mui.singleUnits[endSlice]->dimensionality < 0) { 853 // We have a -per- construct 854 UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status); 855 if (perRule.length() != 1) { 856 // Fixed gender for -per- units 857 return perRule; 858 } 859 if (perRule[0] == u'1') { 860 // Find the start of the denominator. We already know there is one. 861 while (mui.singleUnits[startSlice]->dimensionality >= 0) { 862 startSlice++; 863 } 864 } else { 865 // Find the end of the numerator 866 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) { 867 endSlice--; 868 } 869 if (endSlice < 0) { 870 // We have only a denominator, e.g. "per-second". 871 // TODO(icu-units#28): find out what gender to use in the 872 // absence of a first value - mentioned in CLDR-14253. 873 return {}; 874 } 875 } 876 } 877 if (endSlice > startSlice) { 878 // We have a -times- construct 879 UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status); 880 if (timesRule.length() != 1) { 881 // Fixed gender for -times- units 882 return timesRule; 883 } 884 if (timesRule[0] == u'0') { 885 endSlice = startSlice; 886 } else { 887 // We assume timesRule[0] == u'1' 888 startSlice = endSlice; 889 } 890 } 891 U_ASSERT(startSlice == endSlice); 892 singleUnitIndex = startSlice; 893 } else if (mui.complexity == UMEASURE_UNIT_MIXED) { 894 status = U_INTERNAL_PROGRAM_ERROR; 895 return {}; 896 } else { 897 U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE); 898 U_ASSERT(mui.singleUnits.length() == 1); 899 } 900 901 // Now we know which singleUnit's gender we want 902 const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex]; 903 // Check for any power-prefix gender override: 904 if (std::abs(singleUnit->dimensionality) != 1) { 905 UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status); 906 if (powerRule.length() != 1) { 907 // Fixed gender for -powN- units 908 return powerRule; 909 } 910 // powerRule[0] == u'0'; u'1' not currently in spec. 911 } 912 // Check for any SI and binary prefix gender override: 913 if (std::abs(singleUnit->dimensionality) != 1) { 914 UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status); 915 if (prefixRule.length() != 1) { 916 // Fixed gender for -powN- units 917 return prefixRule; 918 } 919 // prefixRule[0] == u'0'; u'1' not currently in spec. 920 } 921 // Now we've boiled it down to the gender of one simple unit identifier: 922 return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status), 923 status); 924 } 925 926 void maybeCalculateGender(const Locale &locale, 927 const MeasureUnit &unitRef, 928 UnicodeString *outArray, 929 UErrorCode &status) { 930 if (outArray[GENDER_INDEX].isBogus()) { 931 UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status); 932 if (meterGender.isEmpty()) { 933 // No gender for meter: assume ungendered language 934 return; 935 } 936 // We have a gendered language, but are lacking gender for unitRef. 937 outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status); 938 } 939 } 940 941 } // namespace 942 943 void LongNameHandler::forMeasureUnit(const Locale &loc, 944 const MeasureUnit &unitRef, 945 const UNumberUnitWidth &width, 946 const char *unitDisplayCase, 947 const PluralRules *rules, 948 const MicroPropsGenerator *parent, 949 LongNameHandler *fillIn, 950 UErrorCode &status) { 951 // From https://unicode.org/reports/tr35/tr35-general.html#compound-units - 952 // Points 1 and 2 are mostly handled by MeasureUnit: 953 // 954 // 1. If the unitId is empty or invalid, fail 955 // 2. Put the unitId into normalized order 956 U_ASSERT(fillIn != nullptr); 957 958 if (uprv_strcmp(unitRef.getType(), "") != 0) { 959 // Handling built-in units: 960 // 961 // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant) 962 // - If result is not empty, return it 963 UnicodeString simpleFormats[ARRAY_LENGTH]; 964 getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status); 965 maybeCalculateGender(loc, unitRef, simpleFormats, status); 966 if (U_FAILURE(status)) { 967 return; 968 } 969 fillIn->rules = rules; 970 fillIn->parent = parent; 971 fillIn->simpleFormatsToModifiers(simpleFormats, 972 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); 973 if (!simpleFormats[GENDER_INDEX].isBogus()) { 974 fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); 975 } 976 return; 977 978 // TODO(icu-units#145): figure out why this causes a failure in 979 // format/MeasureFormatTest/TestIndividualPluralFallback and other 980 // tests, when it should have been an alternative for the lines above: 981 982 // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); 983 // fillIn->rules = rules; 984 // fillIn->parent = parent; 985 // return; 986 } else { 987 // Check if it is a MeasureUnit this constructor handles: this 988 // constructor does not handle mixed units 989 U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED); 990 forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); 991 fillIn->rules = rules; 992 fillIn->parent = parent; 993 return; 994 } 995 } 996 997 void LongNameHandler::forArbitraryUnit(const Locale &loc, 998 const MeasureUnit &unitRef, 999 const UNumberUnitWidth &width, 1000 const char *unitDisplayCase, 1001 LongNameHandler *fillIn, 1002 UErrorCode &status) { 1003 if (U_FAILURE(status)) { 1004 return; 1005 } 1006 if (fillIn == nullptr) { 1007 status = U_INTERNAL_PROGRAM_ERROR; 1008 return; 1009 } 1010 1011 // Numbered list items are from the algorithms at 1012 // https://unicode.org/reports/tr35/tr35-general.html#compound-units: 1013 // 1014 // 4. Divide the unitId into numerator (the part before the "-per-") and 1015 // denominator (the part after the "-per-). If both are empty, fail 1016 MeasureUnitImpl unit; 1017 MeasureUnitImpl perUnit; 1018 1019 if (unitRef.getConstantDenominator(status) != 0) { 1020 perUnit.constantDenominator = unitRef.getConstantDenominator(status); 1021 } 1022 1023 { 1024 MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); 1025 if (U_FAILURE(status)) { 1026 return; 1027 } 1028 for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) { 1029 SingleUnitImpl *subUnit = fullUnit.singleUnits[i]; 1030 if (subUnit->dimensionality > 0) { 1031 unit.appendSingleUnit(*subUnit, status); 1032 } else { 1033 subUnit->dimensionality *= -1; 1034 perUnit.appendSingleUnit(*subUnit, status); 1035 } 1036 } 1037 } 1038 1039 // TODO(icu-units#28): check placeholder logic, see if it needs to be 1040 // present here instead of only in processPatternTimes: 1041 // 1042 // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty 1043 1044 DerivedComponents derivedPerCases(loc, "case", "per"); 1045 1046 // 6. numeratorUnitString 1047 UnicodeString numeratorUnitData[ARRAY_LENGTH]; 1048 processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase), 1049 numeratorUnitData, status); 1050 1051 // 7. denominatorUnitString 1052 UnicodeString denominatorUnitData[ARRAY_LENGTH]; 1053 processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase), 1054 denominatorUnitData, status); 1055 1056 // TODO(icu-units#139): 1057 // - implement DerivedComponents for "plural/times" and "plural/power": 1058 // French has different rules, we'll be producing the wrong results 1059 // currently. (Prove via tests!) 1060 // - implement DerivedComponents for "plural/per", "plural/prefix", 1061 // "case/times", "case/power", and "case/prefix" - although they're 1062 // currently hardcoded. Languages with different rules are surely on the 1063 // way. 1064 // 1065 // Currently we only use "case/per", "plural/times", "case/times", and 1066 // "case/power". 1067 // 1068 // This may have impact on multiSimpleFormatsToModifiers(...) below too? 1069 // These rules are currently (ICU 69) all the same and hard-coded below. 1070 UnicodeString perUnitPattern; 1071 if (!denominatorUnitData[PER_INDEX].isBogus()) { 1072 // If we have no denominator, we obtain the empty string: 1073 perUnitPattern = denominatorUnitData[PER_INDEX]; 1074 } else { 1075 // 8. Set perPattern to be getValue([per], locale, length) 1076 UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status); 1077 // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. 1078 SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status); 1079 if (U_FAILURE(status)) { 1080 return; 1081 } 1082 // Plural and placeholder handling for 7. denominatorUnitString: 1083 // TODO(icu-units#139): hardcoded: 1084 // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/> 1085 UnicodeString denominatorFormat = 1086 getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status); 1087 // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. 1088 SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status); 1089 if (U_FAILURE(status)) { 1090 return; 1091 } 1092 UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments(); 1093 int32_t trimmedLen = denominatorPattern.length(); 1094 const char16_t *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen); 1095 UnicodeString denominatorString(false, trimmed, trimmedLen); 1096 // 9. If the denominatorString is empty, set result to 1097 // [numeratorString], otherwise set result to format(perPattern, 1098 // numeratorString, denominatorString) 1099 // 1100 // TODO(icu-units#28): Why does UnicodeString need to be explicit in the 1101 // following line? 1102 perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status); 1103 if (U_FAILURE(status)) { 1104 return; 1105 } 1106 } 1107 if (perUnitPattern.length() == 0) { 1108 fillIn->simpleFormatsToModifiers(numeratorUnitData, 1109 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); 1110 } else { 1111 fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern, 1112 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); 1113 } 1114 1115 // Gender 1116 // 1117 // TODO(icu-units#28): find out what gender to use in the absence of a first 1118 // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253. 1119 // 1120 // gender/per deriveCompound rules don't say: 1121 // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) --> 1122 fillIn->gender = getGenderString( 1123 getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status); 1124 } 1125 1126 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, 1127 Locale loc, 1128 const UNumberUnitWidth &width, 1129 const char *caseVariant, 1130 UnicodeString *outArray, 1131 UErrorCode &status) { 1132 if (U_FAILURE(status)) { 1133 return; 1134 } 1135 if (productUnit.complexity == UMEASURE_UNIT_MIXED) { 1136 // These are handled by MixedUnitLongNameHandler 1137 status = U_UNSUPPORTED_ERROR; 1138 return; 1139 } 1140 1141 #if U_DEBUG 1142 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { 1143 U_ASSERT(outArray[pluralIndex].length() == 0); 1144 U_ASSERT(!outArray[pluralIndex].isBogus()); 1145 } 1146 #endif 1147 1148 if (productUnit.identifier.isEmpty()) { 1149 // TODO(icu-units#28): consider when serialize should be called. 1150 // identifier might also be empty for MeasureUnit(). 1151 productUnit.serialize(status); 1152 } 1153 if (U_FAILURE(status)) { 1154 return; 1155 } 1156 if (productUnit.identifier.isEmpty()) { 1157 // MeasureUnit(): no units: return empty strings. 1158 return; 1159 } 1160 1161 MeasureUnit builtinUnit; 1162 if (MeasureUnit::findBySubType(productUnit.identifier.data(), &builtinUnit)) { 1163 // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it 1164 // breaks them all down. Do we want to drop this? 1165 // - findBySubType isn't super efficient, if we skip it and go to basic 1166 // singles, we don't have to construct MeasureUnit's anymore. 1167 // - Check all the existing unit tests that fail without this: is it due 1168 // to incorrect fallback via getMeasureData? 1169 // - Do those unit tests cover this code path representatively? 1170 if (builtinUnit != MeasureUnit()) { 1171 getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status); 1172 maybeCalculateGender(loc, builtinUnit, outArray, status); 1173 } 1174 return; 1175 } 1176 1177 // 2. Set timesPattern to be getValue(times, locale, length) 1178 UnicodeString timesPattern = getCompoundValue("times", loc, width, status); 1179 SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status); 1180 if (U_FAILURE(status)) { 1181 return; 1182 } 1183 1184 PlaceholderPosition globalPlaceholder[ARRAY_LENGTH]; 1185 char16_t globalJoinerChar = 0; 1186 // Numbered list items are from the algorithms at 1187 // https://unicode.org/reports/tr35/tr35-general.html#compound-units: 1188 // 1189 // pattern(...) point 5: 1190 // - Set both globalPlaceholder and globalPlaceholderPosition to be empty 1191 // 1192 // 3. Set result to be empty 1193 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { 1194 // Initial state: empty string pattern, via all falling back to OTHER: 1195 if (pluralIndex == StandardPlural::Form::OTHER) { 1196 outArray[pluralIndex].remove(); 1197 } else { 1198 outArray[pluralIndex].setToBogus(); 1199 } 1200 globalPlaceholder[pluralIndex] = PH_EMPTY; 1201 } 1202 1203 // Empty string represents "compound" (propagate the plural form). 1204 const char *pluralCategory = ""; 1205 DerivedComponents derivedTimesPlurals(loc, "plural", "times"); 1206 DerivedComponents derivedTimesCases(loc, "case", "times"); 1207 DerivedComponents derivedPowerCases(loc, "case", "power"); 1208 1209 if (productUnit.constantDenominator != 0) { 1210 CharString constantString; 1211 constantString.appendNumber(productUnit.constantDenominator, status); 1212 outArray[CONSTANT_DENOMINATOR_INDEX] = UnicodeString::fromUTF8(constantString.toStringPiece()); 1213 } 1214 1215 // 4. For each single_unit in product_unit 1216 for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); 1217 singleUnitIndex++) { 1218 SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex]; 1219 const char *singlePluralCategory; 1220 const char *singleCaseVariant; 1221 // TODO(icu-units#28): ensure we have unit tests that change/fail if we 1222 // assign incorrect case variants here: 1223 if (singleUnitIndex < productUnit.singleUnits.length() - 1) { 1224 // 4.1. If hasMultiple 1225 singlePluralCategory = derivedTimesPlurals.value0(pluralCategory); 1226 singleCaseVariant = derivedTimesCases.value0(caseVariant); 1227 pluralCategory = derivedTimesPlurals.value1(pluralCategory); 1228 caseVariant = derivedTimesCases.value1(caseVariant); 1229 } else { 1230 singlePluralCategory = derivedTimesPlurals.value1(pluralCategory); 1231 singleCaseVariant = derivedTimesCases.value1(caseVariant); 1232 } 1233 1234 // 4.2. Get the gender of that single_unit 1235 MeasureUnit simpleUnit; 1236 if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) { 1237 // Ideally all simple units should be known, but they're not: 1238 // 100-kilometer is internally treated as a simple unit, but it is 1239 // not a built-in unit and does not have formatting data in CLDR 39. 1240 // 1241 // TODO(icu-units#28): test (desirable) invariants in unit tests. 1242 status = U_UNSUPPORTED_ERROR; 1243 return; 1244 } 1245 const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status); 1246 1247 // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-' 1248 U_ASSERT(singleUnit->dimensionality > 0); 1249 int32_t dimensionality = singleUnit->dimensionality; 1250 UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH]; 1251 if (dimensionality != 1) { 1252 // 4.3.1. set dimensionalityPrefixPattern to be 1253 // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender), 1254 // such as "{0} kwadratowym" 1255 CharString dimensionalityKey("compound/power", status); 1256 dimensionalityKey.appendNumber(dimensionality, status); 1257 getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender, 1258 singleCaseVariant, dimensionalityPrefixPatterns, status); 1259 if (U_FAILURE(status)) { 1260 // At the time of writing, only pow2 and pow3 are supported. 1261 // Attempting to format other powers results in a 1262 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we 1263 // understand it: 1264 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) { 1265 status = U_UNSUPPORTED_ERROR; 1266 } 1267 return; 1268 } 1269 1270 // TODO(icu-units#139): 1271 // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory) 1272 1273 // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant) 1274 singleCaseVariant = derivedPowerCases.value0(singleCaseVariant); 1275 // 4.3.4. remove the dimensionality_prefix from singleUnit 1276 singleUnit->dimensionality = 1; 1277 } 1278 1279 // 4.4. if singleUnit starts with an si_prefix, such as 'centi' 1280 UMeasurePrefix prefix = singleUnit->unitPrefix; 1281 UnicodeString prefixPattern; 1282 if (prefix != UMEASURE_PREFIX_ONE) { 1283 // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale, 1284 // length), such as "centy{0}" 1285 CharString prefixKey; 1286 // prefixKey looks like "1024p3" or "10p-2": 1287 prefixKey.appendNumber(umeas_getPrefixBase(prefix), status); 1288 prefixKey.append('p', status); 1289 prefixKey.appendNumber(umeas_getPrefixPower(prefix), status); 1290 // Contains a pattern like "centy{0}". 1291 prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status); 1292 1293 // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory) 1294 // 1295 // TODO(icu-units#139): that refers to these rules: 1296 // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/> 1297 // though I'm not sure what other value they might end up having. 1298 // 1299 // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant) 1300 // 1301 // TODO(icu-units#139): that refers to: 1302 // <deriveComponent feature="case" structure="prefix" value0="nominative" 1303 // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply 1304 // propagates. 1305 1306 // 4.4.4. remove the si_prefix from singleUnit 1307 singleUnit->unitPrefix = UMEASURE_PREFIX_ONE; 1308 } 1309 1310 // 4.5. Set corePattern to be the getValue(singleUnit, locale, length, 1311 // singlePluralCategory, singleCaseVariant), such as "{0} metrem" 1312 UnicodeString singleUnitArray[ARRAY_LENGTH]; 1313 // At this point we are left with a Simple Unit: 1314 U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) == 1315 0); 1316 getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray, 1317 status); 1318 if (U_FAILURE(status)) { 1319 // Shouldn't happen if we have data for all single units 1320 return; 1321 } 1322 1323 // Calculate output gender 1324 if (!singleUnitArray[GENDER_INDEX].isBogus()) { 1325 U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty()); 1326 UnicodeString uVal; 1327 1328 if (prefix != UMEASURE_PREFIX_ONE) { 1329 singleUnitArray[GENDER_INDEX] = 1330 getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status); 1331 } 1332 1333 if (dimensionality != 1) { 1334 singleUnitArray[GENDER_INDEX] = 1335 getDerivedGender(loc, "power", singleUnitArray, nullptr, status); 1336 } 1337 1338 UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status); 1339 if (timesGenderRule.length() == 1) { 1340 switch (timesGenderRule[0]) { 1341 case u'0': 1342 if (singleUnitIndex == 0) { 1343 U_ASSERT(outArray[GENDER_INDEX].isBogus()); 1344 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; 1345 } 1346 break; 1347 case u'1': 1348 if (singleUnitIndex == productUnit.singleUnits.length() - 1) { 1349 U_ASSERT(outArray[GENDER_INDEX].isBogus()); 1350 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; 1351 } 1352 } 1353 } else { 1354 if (outArray[GENDER_INDEX].isBogus()) { 1355 outArray[GENDER_INDEX] = timesGenderRule; 1356 } 1357 } 1358 } 1359 1360 // Calculate resulting patterns for each plural form 1361 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { 1362 StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex); 1363 1364 // singleUnitArray[pluralIndex] looks something like "{0} Meter" 1365 if (outArray[pluralIndex].isBogus()) { 1366 if (singleUnitArray[pluralIndex].isBogus()) { 1367 // Let the usual plural fallback mechanism take care of this 1368 // plural form 1369 continue; 1370 } else { 1371 // Since our singleUnit can have a plural form that outArray 1372 // doesn't yet have (relying on fallback to OTHER), we start 1373 // by grabbing it with the normal plural fallback mechanism 1374 outArray[pluralIndex] = getWithPlural(outArray, plural, status); 1375 if (U_FAILURE(status)) { 1376 return; 1377 } 1378 } 1379 } 1380 1381 if (uprv_strcmp(singlePluralCategory, "") != 0) { 1382 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status)); 1383 } 1384 1385 // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern. 1386 UnicodeString coreUnit; 1387 PlaceholderPosition placeholderPosition; 1388 char16_t joinerChar; 1389 extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit, 1390 placeholderPosition, joinerChar); 1391 1392 // 4.7 If the position is middle, then fail 1393 if (placeholderPosition == PH_MIDDLE) { 1394 status = U_UNSUPPORTED_ERROR; 1395 return; 1396 } 1397 1398 // 4.8. If globalPlaceholder is empty 1399 if (globalPlaceholder[pluralIndex] == PH_EMPTY) { 1400 globalPlaceholder[pluralIndex] = placeholderPosition; 1401 globalJoinerChar = joinerChar; 1402 } else { 1403 // Expect all units involved to have the same placeholder position 1404 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition); 1405 // TODO(icu-units#28): Do we want to add a unit test that checks 1406 // for consistent joiner chars? Probably not, given how 1407 // inconsistent they are. File a CLDR ticket with examples? 1408 } 1409 // Now coreUnit would be just "Meter" 1410 1411 // 4.9. If siPrefixPattern is not empty 1412 if (prefix != UMEASURE_PREFIX_ONE) { 1413 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status); 1414 if (U_FAILURE(status)) { 1415 return; 1416 } 1417 1418 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern, 1419 // coreUnit) 1420 UnicodeString tmp; 1421 // combineLowercasing(locale, length, prefixPattern, coreUnit) 1422 // 1423 // TODO(icu-units#28): run this only if prefixPattern does not 1424 // contain space characters - do languages "as", "bn", "hi", 1425 // "kk", etc have concepts of upper and lower case?: 1426 if (width == UNUM_UNIT_WIDTH_FULL_NAME) { 1427 coreUnit.toLower(loc); 1428 } 1429 prefixCompiled.format(coreUnit, tmp, status); 1430 if (U_FAILURE(status)) { 1431 return; 1432 } 1433 coreUnit = tmp; 1434 } 1435 1436 // 4.10. If dimensionalityPrefixPattern is not empty 1437 if (dimensionality != 1) { 1438 SimpleFormatter dimensionalityCompiled( 1439 getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status); 1440 if (U_FAILURE(status)) { 1441 return; 1442 } 1443 1444 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length, 1445 // dimensionalityPrefixPattern, coreUnit) 1446 UnicodeString tmp; 1447 // combineLowercasing(locale, length, prefixPattern, coreUnit) 1448 // 1449 // TODO(icu-units#28): run this only if prefixPattern does not 1450 // contain space characters - do languages "as", "bn", "hi", 1451 // "kk", etc have concepts of upper and lower case?: 1452 if (width == UNUM_UNIT_WIDTH_FULL_NAME) { 1453 coreUnit.toLower(loc); 1454 } 1455 dimensionalityCompiled.format(coreUnit, tmp, status); 1456 if (U_FAILURE(status)) { 1457 return; 1458 } 1459 coreUnit = tmp; 1460 } 1461 1462 if (outArray[pluralIndex].length() == 0) { 1463 // 4.11. If the result is empty, set result to be coreUnit 1464 outArray[pluralIndex] = coreUnit; 1465 } else { 1466 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit) 1467 UnicodeString tmp; 1468 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status); 1469 outArray[pluralIndex] = tmp; 1470 } 1471 } 1472 } 1473 1474 // 5. Handling constant denominator if it exists. 1475 if (productUnit.constantDenominator != 0) { 1476 int32_t pluralIndex = -1; 1477 for (int32_t index = 0; index < StandardPlural::Form::COUNT; index++) { 1478 if (!outArray[index].isBogus()) { 1479 pluralIndex = index; 1480 break; 1481 } 1482 } 1483 1484 U_ASSERT(pluralIndex >= 0); // "No plural form found for constant denominator" 1485 1486 // TODO(ICU-23039): 1487 // Improve the handling of constant_denominator representation. 1488 // For instance, a constant_denominator of 1000000 should be adaptable to 1489 // formats like 1490 // 1,000,000, 1e6, or 1 million. 1491 // Furthermore, ensure consistent pluralization rules for units. For example, 1492 // "meter per 100 seconds" should be evaluated for correct singular/plural 1493 // usage: "second" or "seconds"? 1494 // Similarly, "kilogram per 1000 meters" should be checked for "meter" or 1495 // "meters"? 1496 if (outArray[pluralIndex].length() == 0) { 1497 outArray[pluralIndex] = outArray[CONSTANT_DENOMINATOR_INDEX]; 1498 } else { 1499 UnicodeString tmp; 1500 timesPatternFormatter.format(outArray[CONSTANT_DENOMINATOR_INDEX], outArray[pluralIndex], 1501 tmp, status); 1502 outArray[pluralIndex] = tmp; 1503 } 1504 } 1505 1506 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { 1507 if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { 1508 UnicodeString tmp; 1509 tmp.append(u"{0}", 3); 1510 if (globalJoinerChar != 0) { 1511 tmp.append(globalJoinerChar); 1512 } 1513 tmp.append(outArray[pluralIndex]); 1514 outArray[pluralIndex] = tmp; 1515 } else if (globalPlaceholder[pluralIndex] == PH_END) { 1516 if (globalJoinerChar != 0) { 1517 outArray[pluralIndex].append(globalJoinerChar); 1518 } 1519 outArray[pluralIndex].append(u"{0}", 3); 1520 } 1521 } 1522 } 1523 1524 UnicodeString LongNameHandler::getUnitDisplayName( 1525 const Locale& loc, 1526 const MeasureUnit& unit, 1527 UNumberUnitWidth width, 1528 UErrorCode& status) { 1529 if (U_FAILURE(status)) { 1530 return ICU_Utility::makeBogusString(); 1531 } 1532 UnicodeString simpleFormats[ARRAY_LENGTH]; 1533 getMeasureData(loc, unit, width, "", simpleFormats, status); 1534 return simpleFormats[DNAM_INDEX]; 1535 } 1536 1537 UnicodeString LongNameHandler::getUnitPattern( 1538 const Locale& loc, 1539 const MeasureUnit& unit, 1540 UNumberUnitWidth width, 1541 StandardPlural::Form pluralForm, 1542 UErrorCode& status) { 1543 if (U_FAILURE(status)) { 1544 return ICU_Utility::makeBogusString(); 1545 } 1546 UnicodeString simpleFormats[ARRAY_LENGTH]; 1547 getMeasureData(loc, unit, width, "", simpleFormats, status); 1548 // The above already handles fallback from other widths to short 1549 if (U_FAILURE(status)) { 1550 return ICU_Utility::makeBogusString(); 1551 } 1552 // Now handle fallback from other plural forms to OTHER 1553 return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]: 1554 simpleFormats[StandardPlural::Form::OTHER]; 1555 } 1556 1557 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, 1558 const PluralRules *rules, 1559 const MicroPropsGenerator *parent, 1560 UErrorCode &status) { 1561 LocalPointer<LongNameHandler> result(new LongNameHandler(rules, parent), status); 1562 if (U_FAILURE(status)) { 1563 return nullptr; 1564 } 1565 UnicodeString simpleFormats[ARRAY_LENGTH]; 1566 getCurrencyLongNameData(loc, currency, simpleFormats, status); 1567 if (U_FAILURE(status)) { return nullptr; } 1568 result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); 1569 // TODO(icu-units#28): currency gender? 1570 return result.orphan(); 1571 } 1572 1573 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, 1574 UErrorCode &status) { 1575 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { 1576 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); 1577 UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); 1578 if (U_FAILURE(status)) { return; } 1579 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); 1580 if (U_FAILURE(status)) { return; } 1581 fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural}); 1582 } 1583 } 1584 1585 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, 1586 Field field, UErrorCode &status) { 1587 SimpleFormatter trailCompiled(trailFormat, 1, 1, status); 1588 if (U_FAILURE(status)) { return; } 1589 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { 1590 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); 1591 UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); 1592 if (U_FAILURE(status)) { return; } 1593 UnicodeString compoundFormat; 1594 if (leadFormat.length() == 0) { 1595 compoundFormat = trailFormat; 1596 } else { 1597 trailCompiled.format(leadFormat, compoundFormat, status); 1598 if (U_FAILURE(status)) { return; } 1599 } 1600 SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); 1601 if (U_FAILURE(status)) { return; } 1602 fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); 1603 } 1604 } 1605 1606 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, 1607 UErrorCode &status) const { 1608 if (parent != nullptr) { 1609 parent->processQuantity(quantity, micros, status); 1610 } 1611 StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); 1612 micros.modOuter = &fModifiers[pluralForm]; 1613 micros.gender = gender; 1614 } 1615 1616 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { 1617 return &fModifiers[plural]; 1618 } 1619 1620 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, 1621 const MeasureUnit &mixedUnit, 1622 const UNumberUnitWidth &width, 1623 const char *unitDisplayCase, 1624 const PluralRules *rules, 1625 const MicroPropsGenerator *parent, 1626 MixedUnitLongNameHandler *fillIn, 1627 UErrorCode &status) { 1628 U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); 1629 U_ASSERT(fillIn != nullptr); 1630 if (U_FAILURE(status)) { 1631 return; 1632 } 1633 1634 MeasureUnitImpl temp; 1635 const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); 1636 // Defensive, for production code: 1637 if (impl.complexity != UMEASURE_UNIT_MIXED) { 1638 // Should be using the normal LongNameHandler 1639 status = U_UNSUPPORTED_ERROR; 1640 return; 1641 } 1642 1643 fillIn->fMixedUnitCount = impl.singleUnits.length(); 1644 fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); 1645 for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { 1646 // Grab data for each of the components. 1647 UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; 1648 // TODO(CLDR-14582): check from the CLDR-14582 ticket whether this 1649 // propagation of unitDisplayCase is correct: 1650 getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, 1651 status); 1652 // TODO(ICU-21494): if we add support for gender for mixed units, we may 1653 // need maybeCalculateGender() here. 1654 } 1655 1656 // TODO(icu-units#120): Make sure ICU doesn't output zero-valued 1657 // high-magnitude fields 1658 // * for mixed units count N, produce N listFormatters, one for each subset 1659 // that might be formatted. 1660 UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; 1661 if (width == UNUM_UNIT_WIDTH_NARROW) { 1662 listWidth = ULISTFMT_WIDTH_NARROW; 1663 } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) { 1664 // This might be the same as SHORT in most languages: 1665 listWidth = ULISTFMT_WIDTH_WIDE; 1666 } 1667 fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( 1668 ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); 1669 // TODO(ICU-21494): grab gender of each unit, calculate the gender 1670 // associated with this list formatter, save it for later. 1671 fillIn->rules = rules; 1672 fillIn->parent = parent; 1673 1674 // We need a localised NumberFormatter for the numbers of the bigger units 1675 // (providing Arabic numerals, for example). 1676 fillIn->fNumberFormatter = NumberFormatter::withLocale(loc); 1677 } 1678 1679 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, 1680 UErrorCode &status) const { 1681 U_ASSERT(fMixedUnitCount > 1); 1682 if (parent != nullptr) { 1683 parent->processQuantity(quantity, micros, status); 1684 } 1685 micros.modOuter = getMixedUnitModifier(quantity, micros, status); 1686 } 1687 1688 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity, 1689 MicroProps µs, 1690 UErrorCode &status) const { 1691 if (micros.mixedMeasuresCount == 0) { 1692 U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value 1693 status = U_UNSUPPORTED_ERROR; 1694 return µs.helpers.emptyWeakModifier; 1695 } 1696 1697 // Algorithm: 1698 // 1699 // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should 1700 // find "3 yard" and "1 foot" in micros.mixedMeasures. 1701 // 1702 // Obtain long-names with plural forms corresponding to measure values: 1703 // * {0} yards, {0} foot, {0} inches 1704 // 1705 // Format the integer values appropriately and modify with the format 1706 // strings: 1707 // - 3 yards, 1 foot 1708 // 1709 // Use ListFormatter to combine, with one placeholder: 1710 // - 3 yards, 1 foot and {0} inches 1711 // 1712 // Return a SimpleModifier for this pattern, letting the rest of the 1713 // pipeline take care of the remaining inches. 1714 1715 LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status); 1716 if (U_FAILURE(status)) { 1717 return µs.helpers.emptyWeakModifier; 1718 } 1719 1720 StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER; 1721 for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) { 1722 DecimalQuantity fdec; 1723 1724 // If numbers are negative, only the first number needs to have its 1725 // negative sign formatted. 1726 int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i]; 1727 1728 if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity` 1729 // If quantity is not the first value and quantity is negative 1730 if (micros.indexOfQuantity > 0 && quantity.isNegative()) { 1731 quantity.negate(); 1732 } 1733 1734 StandardPlural::Form quantityPlural = 1735 utils::getPluralSafe(micros.rounder, rules, quantity, status); 1736 UnicodeString quantityFormatWithPlural = 1737 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status); 1738 SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status); 1739 quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status); 1740 } else { 1741 fdec.setToLong(number); 1742 StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); 1743 UnicodeString simpleFormat = 1744 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); 1745 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); 1746 UnicodeString num; 1747 auto appendable = UnicodeStringAppendable(num); 1748 1749 fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); 1750 compiledFormatter.format(num, outputMeasuresList[i], status); 1751 } 1752 } 1753 1754 // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we 1755 // can set micros.gender to the gender associated with the list formatter in 1756 // use below (once we have correct support for that). And then document this 1757 // appropriately? "getMixedUnitModifier" doesn't sound like it would do 1758 // something like this. 1759 1760 // Combine list into a "premixed" pattern 1761 UnicodeString premixedFormatPattern; 1762 fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern, 1763 status); 1764 SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status); 1765 if (U_FAILURE(status)) { 1766 return µs.helpers.emptyWeakModifier; 1767 } 1768 1769 micros.helpers.mixedUnitModifier = 1770 SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural}); 1771 return µs.helpers.mixedUnitModifier; 1772 } 1773 1774 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, 1775 StandardPlural::Form /*plural*/) const { 1776 // TODO(icu-units#28): investigate this method when investigating where 1777 // ModifierStore::getModifier() gets used. To be sure it remains 1778 // unreachable: 1779 UPRV_UNREACHABLE_EXIT; 1780 return nullptr; 1781 } 1782 1783 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, 1784 const MaybeStackVector<MeasureUnit> &units, 1785 const UNumberUnitWidth &width, 1786 const char *unitDisplayCase, 1787 const PluralRules *rules, 1788 const MicroPropsGenerator *parent, 1789 UErrorCode &status) { 1790 LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status); 1791 if (U_FAILURE(status)) { 1792 return nullptr; 1793 } 1794 U_ASSERT(units.length() > 0); 1795 if (result->fHandlers.resize(units.length()) == nullptr) { 1796 status = U_MEMORY_ALLOCATION_ERROR; 1797 return nullptr; 1798 } 1799 result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); 1800 for (int32_t i = 0, length = units.length(); i < length; i++) { 1801 const MeasureUnit &unit = *units[i]; 1802 result->fMeasureUnits[i] = unit; 1803 if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { 1804 MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); 1805 MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, 1806 mlnh, status); 1807 result->fHandlers[i] = mlnh; 1808 } else { 1809 LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); 1810 LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, lnh, status); 1811 result->fHandlers[i] = lnh; 1812 } 1813 if (U_FAILURE(status)) { 1814 return nullptr; 1815 } 1816 } 1817 return result.orphan(); 1818 } 1819 1820 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs, 1821 UErrorCode &status) const { 1822 // We call parent->processQuantity() from the Multiplexer, instead of 1823 // letting LongNameHandler handle it: we don't know which LongNameHandler to 1824 // call until we've called the parent! 1825 fParent->processQuantity(quantity, micros, status); 1826 1827 // Call the correct LongNameHandler based on outputUnit 1828 for (int i = 0; i < fHandlers.getCapacity(); i++) { 1829 if (fMeasureUnits[i] == micros.outputUnit) { 1830 fHandlers[i]->processQuantity(quantity, micros, status); 1831 return; 1832 } 1833 } 1834 if (U_FAILURE(status)) { 1835 return; 1836 } 1837 // We shouldn't receive any outputUnit for which we haven't already got a 1838 // LongNameHandler: 1839 status = U_INTERNAL_PROGRAM_ERROR; 1840 } 1841 1842 #endif /* #if !UCONFIG_NO_FORMATTING */