units_data.cpp (23021B)
1 // © 2020 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 #include "bytesinkutil.h" 9 #include "charstr.h" 10 #include "cstring.h" 11 #include "measunit_impl.h" 12 #include "number_decimalquantity.h" 13 #include "resource.h" 14 #include "uassert.h" 15 #include "ulocimp.h" 16 #include "unicode/locid.h" 17 #include "unicode/unistr.h" 18 #include "unicode/ures.h" 19 #include "units_data.h" 20 #include "uresimp.h" 21 #include "util.h" 22 #include <utility> 23 24 U_NAMESPACE_BEGIN 25 namespace units { 26 27 namespace { 28 29 using icu::number::impl::DecimalQuantity; 30 31 void trimSpaces(CharString& factor, UErrorCode& status){ 32 CharString trimmed; 33 for (int i = 0 ; i < factor.length(); i++) { 34 if (factor[i] == ' ') continue; 35 36 trimmed.append(factor[i], status); 37 } 38 39 factor = std::move(trimmed); 40 } 41 42 /** 43 * A ResourceSink that collects conversion rate information. 44 * 45 * This class is for use by ures_getAllItemsWithFallback. 46 */ 47 class ConversionRateDataSink : public ResourceSink { 48 public: 49 /** 50 * Constructor. 51 * @param out The vector to which ConversionRateInfo instances are to be 52 * added. This vector must outlive the use of the ResourceSink. 53 */ 54 explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {} 55 56 /** 57 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit 58 * conversion rates that are found in `value` to the output vector. 59 * 60 * @param source This string must be "convertUnits": the resource that this 61 * class supports reading. 62 * @param value The "convertUnits" resource, containing unit conversion rate 63 * information. 64 * @param noFallback Ignored. 65 * @param status The standard ICU error code output parameter. 66 */ 67 void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { 68 if (U_FAILURE(status)) { return; } 69 if (uprv_strcmp(source, "convertUnits") != 0) { 70 // This is very strict, however it is the cheapest way to be sure 71 // that with `value`, we're looking at the convertUnits table. 72 status = U_ILLEGAL_ARGUMENT_ERROR; 73 return; 74 } 75 ResourceTable conversionRateTable = value.getTable(status); 76 const char *srcUnit; 77 // We're reusing `value`, which seems to be a common pattern: 78 for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) { 79 ResourceTable unitTable = value.getTable(status); 80 const char *key; 81 UnicodeString baseUnit = ICU_Utility::makeBogusString(); 82 UnicodeString factor = ICU_Utility::makeBogusString(); 83 UnicodeString offset = ICU_Utility::makeBogusString(); 84 UnicodeString special = ICU_Utility::makeBogusString(); 85 UnicodeString systems = ICU_Utility::makeBogusString(); 86 for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { 87 if (uprv_strcmp(key, "target") == 0) { 88 baseUnit = value.getUnicodeString(status); 89 } else if (uprv_strcmp(key, "factor") == 0) { 90 factor = value.getUnicodeString(status); 91 } else if (uprv_strcmp(key, "offset") == 0) { 92 offset = value.getUnicodeString(status); 93 } else if (uprv_strcmp(key, "special") == 0) { 94 special = value.getUnicodeString(status); // the name of a special mapping used instead of factor + optional offset. 95 } else if (uprv_strcmp(key, "systems") == 0) { 96 systems = value.getUnicodeString(status); 97 } 98 } 99 if (U_FAILURE(status)) { return; } 100 if (baseUnit.isBogus() || (factor.isBogus() && special.isBogus())) { 101 // We could not find a usable conversion rate: bad resource. 102 status = U_MISSING_RESOURCE_ERROR; 103 return; 104 } 105 106 // We don't have this ConversionRateInfo yet: add it. 107 ConversionRateInfo *cr = outVector->emplaceBack(); 108 if (!cr) { 109 status = U_MEMORY_ALLOCATION_ERROR; 110 return; 111 } else { 112 cr->sourceUnit = srcUnit; 113 if (cr->sourceUnit.isEmpty() != (*srcUnit == '\0')) { 114 status = U_MEMORY_ALLOCATION_ERROR; 115 } 116 copyInvariantChars(baseUnit, cr->baseUnit, status); 117 if (U_SUCCESS(status) && !factor.isBogus()) { 118 CharString tmp; 119 tmp.appendInvariantChars(factor, status); 120 trimSpaces(tmp, status); 121 if (U_SUCCESS(status)) { 122 cr->factor = tmp.toStringPiece(); 123 if (cr->factor.isEmpty() != tmp.isEmpty()) { 124 status = U_MEMORY_ALLOCATION_ERROR; 125 } 126 } 127 } 128 if (!offset.isBogus()) { copyInvariantChars(offset, cr->offset, status); } 129 if (!special.isBogus()) { copyInvariantChars(special, cr->specialMappingName, status); } 130 copyInvariantChars(systems, cr->systems, status); 131 } 132 } 133 } 134 135 private: 136 MaybeStackVector<ConversionRateInfo> *outVector; 137 }; 138 139 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) { 140 return a.compareTo(b) < 0; 141 } 142 143 /** 144 * A ResourceSink that collects unit preferences information. 145 * 146 * This class is for use by ures_getAllItemsWithFallback. 147 */ 148 class UnitPreferencesSink : public ResourceSink { 149 public: 150 /** 151 * Constructor. 152 * @param outPrefs The vector to which UnitPreference instances are to be 153 * added. This vector must outlive the use of the ResourceSink. 154 * @param outMetadata The vector to which UnitPreferenceMetadata instances 155 * are to be added. This vector must outlive the use of the ResourceSink. 156 */ 157 explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs, 158 MaybeStackVector<UnitPreferenceMetadata> *outMetadata) 159 : preferences(outPrefs), metadata(outMetadata) {} 160 161 /** 162 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit 163 * preferences info that are found in `value` to the output vector. 164 * 165 * @param source This string must be "unitPreferenceData": the resource that 166 * this class supports reading. 167 * @param value The "unitPreferenceData" resource, containing unit 168 * preferences data. 169 * @param noFallback Ignored. 170 * @param status The standard ICU error code output parameter. Note: if an 171 * error is returned, outPrefs and outMetadata may be inconsistent. 172 */ 173 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { 174 if (U_FAILURE(status)) { return; } 175 if (uprv_strcmp(key, "unitPreferenceData") != 0) { 176 // This is very strict, however it is the cheapest way to be sure 177 // that with `value`, we're looking at the convertUnits table. 178 status = U_ILLEGAL_ARGUMENT_ERROR; 179 return; 180 } 181 // The unitPreferenceData structure (see data/misc/units.txt) contains a 182 // hierarchy of category/usage/region, within which are a set of 183 // preferences. Hence three for-loops and another loop for the 184 // preferences themselves: 185 ResourceTable unitPreferenceDataTable = value.getTable(status); 186 const char *category; 187 for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) { 188 ResourceTable categoryTable = value.getTable(status); 189 const char *usage; 190 for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) { 191 ResourceTable regionTable = value.getTable(status); 192 const char *region; 193 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) { 194 // `value` now contains the set of preferences for 195 // category/usage/region. 196 ResourceArray unitPrefs = value.getArray(status); 197 if (U_FAILURE(status)) { return; } 198 int32_t prefLen = unitPrefs.getSize(); 199 200 // Update metadata for this set of preferences. 201 UnitPreferenceMetadata *meta = metadata->emplaceBack( 202 category, usage, region, preferences->length(), prefLen, status); 203 if (!meta) { 204 status = U_MEMORY_ALLOCATION_ERROR; 205 return; 206 } 207 if (U_FAILURE(status)) { return; } 208 if (metadata->length() > 1) { 209 // Verify that unit preferences are sorted and 210 // without duplicates. 211 if (!(*(*metadata)[metadata->length() - 2] < 212 *(*metadata)[metadata->length() - 1])) { 213 status = U_INVALID_FORMAT_ERROR; 214 return; 215 } 216 } 217 218 // Collect the individual preferences. 219 for (int32_t i = 0; unitPrefs.getValue(i, value); i++) { 220 UnitPreference *up = preferences->emplaceBack(); 221 if (!up) { 222 status = U_MEMORY_ALLOCATION_ERROR; 223 return; 224 } 225 ResourceTable unitPref = value.getTable(status); 226 if (U_FAILURE(status)) { return; } 227 for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) { 228 if (uprv_strcmp(key, "unit") == 0) { 229 copyInvariantChars(value.getUnicodeString(status), up->unit, status); 230 } else if (uprv_strcmp(key, "geq") == 0) { 231 int32_t length; 232 const char16_t *g = value.getString(length, status); 233 CharString geq; 234 geq.appendInvariantChars(g, length, status); 235 DecimalQuantity dq; 236 dq.setToDecNumber(geq.data(), status); 237 up->geq = dq.toDouble(); 238 } else if (uprv_strcmp(key, "skeleton") == 0) { 239 up->skeleton = value.getUnicodeString(status); 240 } 241 } 242 } 243 } 244 } 245 } 246 } 247 248 private: 249 MaybeStackVector<UnitPreference> *preferences; 250 MaybeStackVector<UnitPreferenceMetadata> *metadata; 251 }; 252 253 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata, 254 const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage, 255 bool *foundRegion, UErrorCode &status) { 256 if (U_FAILURE(status)) { return -1; } 257 int32_t start = 0; 258 int32_t end = metadata->length(); 259 *foundCategory = false; 260 *foundUsage = false; 261 *foundRegion = false; 262 while (start < end) { 263 int32_t mid = (start + end) / 2; 264 int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion); 265 if (cmp < 0) { 266 start = mid + 1; 267 } else if (cmp > 0) { 268 end = mid; 269 } else { 270 return mid; 271 } 272 } 273 return -1; 274 } 275 276 /** 277 * Finds the UnitPreferenceMetadata instance that matches the given category, 278 * usage and region: if missing, region falls back to "001", and usage 279 * repeatedly drops tailing components, eventually trying "default" 280 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default"). 281 * 282 * @param metadata The full list of UnitPreferenceMetadata instances. 283 * @param category The category to search for. See getUnitCategory(). 284 * @param usage The usage for which formatting preferences is needed. If the 285 * given usage is not known, automatic fallback occurs, see function description 286 * above. 287 * @param region The region for which preferences are needed. If there are no 288 * region-specific preferences, this function automatically falls back to the 289 * "001" region (global). 290 * @param status The standard ICU error code output parameter. 291 * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR. 292 * * If fallback to "default" or "001" didn't resolve, status will be 293 * U_MISSING_RESOURCE. 294 * @return The index into the metadata vector which represents the appropriate 295 * preferences. If appropriate preferences are not found, -1 is returned. 296 */ 297 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata, 298 StringPiece category, StringPiece usage, StringPiece region, 299 UErrorCode &status) { 300 if (U_FAILURE(status)) { return -1; } 301 bool foundCategory, foundUsage, foundRegion; 302 UnitPreferenceMetadata desired(category, usage, region, -1, -1, status); 303 int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); 304 if (U_FAILURE(status)) { return -1; } 305 if (idx >= 0) { return idx; } 306 if (!foundCategory) { 307 // TODO: failures can happen if units::getUnitCategory returns a category 308 // that does not appear in unitPreferenceData. Do we want a unit test that 309 // checks unitPreferenceData has full coverage of categories? Or just trust 310 // CLDR? 311 status = U_ILLEGAL_ARGUMENT_ERROR; 312 return -1; 313 } 314 U_ASSERT(foundCategory); 315 while (!foundUsage) { 316 int32_t lastDashIdx = desired.usage.lastIndexOf('-'); 317 if (lastDashIdx > 0) { 318 desired.usage.truncate(lastDashIdx); 319 } else if (uprv_strcmp(desired.usage.data(), "default") != 0) { 320 desired.usage.truncate(0).append("default", status); 321 } else { 322 // "default" is not supposed to be missing for any valid category. 323 status = U_MISSING_RESOURCE_ERROR; 324 return -1; 325 } 326 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); 327 if (U_FAILURE(status)) { return -1; } 328 } 329 U_ASSERT(foundCategory); 330 U_ASSERT(foundUsage); 331 if (!foundRegion) { 332 if (uprv_strcmp(desired.region.data(), "001") != 0) { 333 desired.region.truncate(0).append("001", status); 334 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); 335 } 336 if (!foundRegion) { 337 // "001" is not supposed to be missing for any valid usage. 338 status = U_MISSING_RESOURCE_ERROR; 339 return -1; 340 } 341 } 342 U_ASSERT(foundCategory); 343 U_ASSERT(foundUsage); 344 U_ASSERT(foundRegion); 345 U_ASSERT(idx >= 0); 346 return idx; 347 } 348 349 } // namespace 350 351 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage, 352 StringPiece region, int32_t prefsOffset, 353 int32_t prefsCount, UErrorCode &status) { 354 this->category.append(category, status); 355 this->usage.append(usage, status); 356 this->region.append(region, status); 357 this->prefsOffset = prefsOffset; 358 this->prefsCount = prefsCount; 359 } 360 361 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const { 362 int32_t cmp = uprv_strcmp(category.data(), other.category.data()); 363 if (cmp == 0) { 364 cmp = uprv_strcmp(usage.data(), other.usage.data()); 365 } 366 if (cmp == 0) { 367 cmp = uprv_strcmp(region.data(), other.region.data()); 368 } 369 return cmp; 370 } 371 372 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, 373 bool *foundUsage, bool *foundRegion) const { 374 int32_t cmp = uprv_strcmp(category.data(), other.category.data()); 375 if (cmp == 0) { 376 *foundCategory = true; 377 cmp = uprv_strcmp(usage.data(), other.usage.data()); 378 } 379 if (cmp == 0) { 380 *foundUsage = true; 381 cmp = uprv_strcmp(region.data(), other.region.data()); 382 } 383 if (cmp == 0) { 384 *foundRegion = true; 385 } 386 return cmp; 387 } 388 389 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? 390 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) { 391 LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); 392 ConversionRateDataSink sink(&result); 393 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status); 394 } 395 396 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source, 397 UErrorCode &status) const { 398 for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) { 399 if (uprv_strncmp(conversionInfo_[i]->sourceUnit.data(), source.data(), source.size()) == 0) { 400 return conversionInfo_[i]; 401 } 402 } 403 404 status = U_INTERNAL_PROGRAM_ERROR; 405 return nullptr; 406 } 407 408 UnitPreferences::UnitPreferences(UErrorCode& status) { 409 LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); 410 UnitPreferencesSink sink(&unitPrefs_, &metadata_); 411 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status); 412 } 413 414 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) { 415 if (U_FAILURE(status)) { return {}; } 416 auto result = locale.getKeywordValue<CharString>(kw, status); 417 if (U_SUCCESS(status) && result.isEmpty()) { 418 status = U_MISSING_RESOURCE_ERROR; 419 } 420 return result; 421 } 422 423 MaybeStackVector<UnitPreference> UnitPreferences::getPreferencesFor(StringPiece category, 424 StringPiece usage, 425 const Locale& locale, 426 UErrorCode& status) const { 427 MaybeStackVector<UnitPreference> result; 428 429 // TODO: remove this once all the categories are allowed. 430 // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping 431 UErrorCode internalMuStatus = U_ZERO_ERROR; 432 if (category.compare("temperature") == 0) { 433 CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus); 434 if (U_SUCCESS(internalMuStatus)) { 435 // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit` 436 if (localeUnitCharString == "fahrenhe") { 437 localeUnitCharString = CharString("fahrenheit", status); 438 } 439 // TODO: use the unit category as Java especially when all the categories are allowed.. 440 if (localeUnitCharString == "celsius" 441 || localeUnitCharString == "fahrenheit" 442 || localeUnitCharString == "kelvin" 443 ) { 444 UnitPreference unitPref; 445 unitPref.unit = localeUnitCharString.toStringPiece(); 446 if (unitPref.unit.isEmpty() != localeUnitCharString.isEmpty()) { 447 status = U_MISSING_RESOURCE_ERROR; 448 return result; 449 } 450 result.emplaceBackAndCheckErrorCode(status, unitPref); 451 return result; 452 } 453 } 454 } 455 456 CharString region = ulocimp_getRegionForSupplementalData(locale.getName(), true, status); 457 458 // Check the locale system tag, e.g `ms=metric`. 459 UErrorCode internalMeasureTagStatus = U_ZERO_ERROR; 460 CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus); 461 bool isLocaleSystem = false; 462 if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) { 463 isLocaleSystem = true; 464 } 465 466 int32_t idx = 467 getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status); 468 if (U_FAILURE(status)) { 469 return result; 470 } 471 472 U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. 473 const UnitPreferenceMetadata *m = metadata_[idx]; 474 475 if (isLocaleSystem) { 476 // if the locale ID specifies a measurment system, check if ALL of the units we got back 477 // are members of that system (or are "metric_adjacent", which we consider to match all 478 // the systems) 479 bool unitsMatchSystem = true; 480 ConversionRates rates(status); 481 for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) { 482 const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]); 483 MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status); 484 for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) { 485 const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j]; 486 const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status); 487 const char* systems = rateInfo->systems.data(); 488 // "metric-adjacent" is considered to match all the locale systems 489 if (uprv_strstr(systems, "metric_adjacent") == nullptr) { 490 if (uprv_strstr(systems, localeSystem.data()) == nullptr) { 491 unitsMatchSystem = false; 492 } 493 } 494 } 495 } 496 497 // if any of the units we got back above don't match the mearurement system the locale ID asked for, 498 // throw out the region and just load the units for the base region for the requested measurement system 499 if (!unitsMatchSystem) { 500 region.clear(); 501 if (localeSystem == "ussystem") { 502 region.append("US", status); 503 } else if (localeSystem == "uksystem") { 504 region.append("GB", status); 505 } else { 506 region.append("001", status); 507 } 508 idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status); 509 if (U_FAILURE(status)) { 510 return result; 511 } 512 513 m = metadata_[idx]; 514 } 515 } 516 517 for (int32_t i = 0; i < m->prefsCount; i++) { 518 result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset])); 519 } 520 return result; 521 } 522 523 } // namespace units 524 U_NAMESPACE_END 525 526 #endif /* #if !UCONFIG_NO_FORMATTING */