numrange_impl.cpp (18424B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 // Allow implicit conversion from char16_t* to UnicodeString for this file: 9 // Helpful in toString methods and elsewhere. 10 #define UNISTR_FROM_STRING_EXPLICIT 11 12 #include "unicode/numberrangeformatter.h" 13 #include "numrange_impl.h" 14 #include "patternprops.h" 15 #include "pluralranges.h" 16 #include "uresimp.h" 17 #include "util.h" 18 19 using namespace icu; 20 using namespace icu::number; 21 using namespace icu::number::impl; 22 23 namespace { 24 25 // Helper function for 2-dimensional switch statement 26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) { 27 return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4); 28 } 29 30 31 struct NumberRangeData { 32 SimpleFormatter rangePattern; 33 // Note: approximatelyPattern is unused since ICU 69. 34 // SimpleFormatter approximatelyPattern; 35 }; 36 37 class NumberRangeDataSink : public ResourceSink { 38 public: 39 NumberRangeDataSink(NumberRangeData& data) : fData(data) {} 40 41 void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override { 42 ResourceTable miscTable = value.getTable(status); 43 if (U_FAILURE(status)) { return; } 44 for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) { 45 if (uprv_strcmp(key, "range") == 0) { 46 if (hasRangeData()) { 47 continue; // have already seen this pattern 48 } 49 fData.rangePattern = {value.getUnicodeString(status), status}; 50 } 51 /* 52 // Note: approximatelyPattern is unused since ICU 69. 53 else if (uprv_strcmp(key, "approximately") == 0) { 54 if (hasApproxData()) { 55 continue; // have already seen this pattern 56 } 57 fData.approximatelyPattern = {value.getUnicodeString(status), status}; 58 } 59 */ 60 } 61 } 62 63 bool hasRangeData() { 64 return fData.rangePattern.getArgumentLimit() != 0; 65 } 66 67 /* 68 // Note: approximatelyPattern is unused since ICU 69. 69 bool hasApproxData() { 70 return fData.approximatelyPattern.getArgumentLimit() != 0; 71 } 72 */ 73 74 bool isComplete() { 75 return hasRangeData() /* && hasApproxData() */; 76 } 77 78 void fillInDefaults(UErrorCode& status) { 79 if (!hasRangeData()) { 80 fData.rangePattern = {u"{0}–{1}", status}; 81 } 82 /* 83 if (!hasApproxData()) { 84 fData.approximatelyPattern = {u"~{0}", status}; 85 } 86 */ 87 } 88 89 private: 90 NumberRangeData& fData; 91 }; 92 93 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) { 94 if (U_FAILURE(status)) { return; } 95 LocalUResourceBundlePointer rb(ures_open(nullptr, localeName, &status)); 96 if (U_FAILURE(status)) { return; } 97 NumberRangeDataSink sink(data); 98 99 CharString dataPath; 100 dataPath.append("NumberElements/", -1, status); 101 dataPath.append(nsName, -1, status); 102 dataPath.append("/miscPatterns", -1, status); 103 if (U_FAILURE(status)) { return; } 104 105 UErrorCode localStatus = U_ZERO_ERROR; 106 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus); 107 if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) { 108 status = localStatus; 109 return; 110 } 111 112 // Fall back to latn if necessary 113 if (!sink.isComplete()) { 114 ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status); 115 } 116 117 sink.fillInDefaults(status); 118 } 119 120 } // namespace 121 122 123 124 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status) 125 : formatterImpl1(macros.formatter1.fMacros, status), 126 formatterImpl2(macros.formatter2.fMacros, status), 127 fSameFormatters(macros.singleFormatter), 128 fCollapse(macros.collapse), 129 fIdentityFallback(macros.identityFallback), 130 fApproximatelyFormatter(status) { 131 132 const char* nsName = formatterImpl1.getRawMicroProps().nsName; 133 if (!fSameFormatters && uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) { 134 status = U_ILLEGAL_ARGUMENT_ERROR; 135 return; 136 } 137 138 NumberRangeData data; 139 getNumberRangeData(macros.locale.getName(), nsName, data, status); 140 if (U_FAILURE(status)) { return; } 141 fRangeFormatter = data.rangePattern; 142 143 if (fSameFormatters && ( 144 fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY || 145 fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) { 146 MacroProps approximatelyMacros(macros.formatter1.fMacros); 147 approximatelyMacros.approximately = true; 148 // Use in-place construction because NumberFormatterImpl has internal self-pointers 149 fApproximatelyFormatter.~NumberFormatterImpl(); 150 new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status); 151 } 152 153 // TODO: Get locale from PluralRules instead? 154 fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status); 155 if (U_FAILURE(status)) { return; } 156 } 157 158 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const { 159 if (U_FAILURE(status)) { 160 return; 161 } 162 163 DecimalQuantity quantityBackup(data.quantity1); 164 165 MicroProps micros1; 166 MicroProps micros2; 167 formatterImpl1.preProcess(data.quantity1, micros1, status); 168 if (fSameFormatters) { 169 formatterImpl1.preProcess(data.quantity2, micros2, status); 170 } else { 171 formatterImpl2.preProcess(data.quantity2, micros2, status); 172 } 173 if (U_FAILURE(status)) { 174 return; 175 } 176 177 // If any of the affixes are different, an identity is not possible 178 // and we must use formatRange(). 179 // TODO: Write this as MicroProps operator==() ? 180 // TODO: Avoid the redundancy of these equality operations with the 181 // ones in formatRange? 182 if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner) 183 || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle) 184 || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) { 185 formatRange(data, micros1, micros2, status); 186 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL; 187 return; 188 } 189 190 // Check for identity 191 if (equalBeforeRounding) { 192 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING; 193 } else if (data.quantity1 == data.quantity2) { 194 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING; 195 } else { 196 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL; 197 } 198 199 switch (identity2d(fIdentityFallback, data.identityResult)) { 200 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, 201 UNUM_IDENTITY_RESULT_NOT_EQUAL): 202 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, 203 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): 204 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, 205 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): 206 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, 207 UNUM_IDENTITY_RESULT_NOT_EQUAL): 208 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, 209 UNUM_IDENTITY_RESULT_NOT_EQUAL): 210 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, 211 UNUM_IDENTITY_RESULT_NOT_EQUAL): 212 formatRange(data, micros1, micros2, status); 213 break; 214 215 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, 216 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): 217 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, 218 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): 219 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, 220 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): 221 formatApproximately(data, quantityBackup, micros1, micros2, status); 222 break; 223 224 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, 225 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): 226 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, 227 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): 228 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, 229 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): 230 formatSingleValue(data, micros1, micros2, status); 231 break; 232 233 default: 234 UPRV_UNREACHABLE_EXIT; 235 } 236 } 237 238 239 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data, 240 MicroProps& micros1, MicroProps& micros2, 241 UErrorCode& status) const { 242 if (U_FAILURE(status)) { return; } 243 if (fSameFormatters) { 244 int32_t length = NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, data.getStringRef(), 0, status); 245 NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status); 246 } else { 247 formatRange(data, micros1, micros2, status); 248 } 249 } 250 251 252 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data, 253 DecimalQuantity quantity, 254 MicroProps& micros1, MicroProps& micros2, 255 UErrorCode& status) const { 256 if (U_FAILURE(status)) { return; } 257 if (fSameFormatters) { 258 // Re-format using the approximately formatter: 259 MicroProps microsAppx; 260 fApproximatelyFormatter.preProcess(quantity, microsAppx, status); 261 int32_t length = NumberFormatterImpl::writeNumber(microsAppx.simple, quantity, data.getStringRef(), 0, status); 262 length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status); 263 length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status); 264 microsAppx.modOuter->apply(data.getStringRef(), 0, length, status); 265 } else { 266 formatRange(data, micros1, micros2, status); 267 } 268 } 269 270 271 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, 272 MicroProps& micros1, MicroProps& micros2, 273 UErrorCode& status) const { 274 if (U_FAILURE(status)) { return; } 275 276 // modInner is always notation (scientific); collapsable in ALL. 277 // modOuter is always units; collapsable in ALL, AUTO, and UNIT. 278 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT. 279 // Never collapse an outer mod but not an inner mod. 280 bool collapseOuter, collapseMiddle, collapseInner; 281 switch (fCollapse) { 282 case UNUM_RANGE_COLLAPSE_ALL: 283 case UNUM_RANGE_COLLAPSE_AUTO: 284 case UNUM_RANGE_COLLAPSE_UNIT: 285 { 286 // OUTER MODIFIER 287 collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter); 288 289 if (!collapseOuter) { 290 // Never collapse inner mods if outer mods are not collapsable 291 collapseMiddle = false; 292 collapseInner = false; 293 break; 294 } 295 296 // MIDDLE MODIFIER 297 collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle); 298 299 if (!collapseMiddle) { 300 // Never collapse inner mods if outer mods are not collapsable 301 collapseInner = false; 302 break; 303 } 304 305 // MIDDLE MODIFIER HEURISTICS 306 // (could disable collapsing of the middle modifier) 307 // The modifiers are equal by this point, so we can look at just one of them. 308 const Modifier* mm = micros1.modMiddle; 309 if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) { 310 // Only collapse if the modifier is a unit. 311 // TODO: Make a better way to check for a unit? 312 // TODO: Handle case where the modifier has both notation and unit (compact currency)? 313 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}) 314 && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) { 315 collapseMiddle = false; 316 } 317 } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) { 318 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point. 319 if (mm->getCodePointCount() <= 1) { 320 collapseMiddle = false; 321 } 322 } 323 324 if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) { 325 collapseInner = false; 326 break; 327 } 328 329 // INNER MODIFIER 330 collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner); 331 332 // All done checking for collapsibility. 333 break; 334 } 335 336 default: 337 collapseOuter = false; 338 collapseMiddle = false; 339 collapseInner = false; 340 break; 341 } 342 343 FormattedStringBuilder& string = data.getStringRef(); 344 int32_t lengthPrefix = 0; 345 int32_t length1 = 0; 346 int32_t lengthInfix = 0; 347 int32_t length2 = 0; 348 int32_t lengthSuffix = 0; 349 350 // Use #define so that these are evaluated at the call site. 351 #define UPRV_INDEX_0 (lengthPrefix) 352 #define UPRV_INDEX_1 (lengthPrefix + length1) 353 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix) 354 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2) 355 #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix) 356 357 int32_t lengthRange = SimpleModifier::formatTwoArgPattern( 358 fRangeFormatter, 359 string, 360 0, 361 &lengthPrefix, 362 &lengthSuffix, 363 kUndefinedField, 364 status); 365 if (U_FAILURE(status)) { return; } 366 lengthInfix = lengthRange - lengthPrefix - lengthSuffix; 367 U_ASSERT(lengthInfix > 0); 368 369 // SPACING HEURISTIC 370 // Add spacing unless all modifiers are collapsed. 371 // TODO: add API to control this? 372 // TODO: Use a data-driven heuristic like currency spacing? 373 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications) 374 { 375 bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0; 376 bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0; 377 bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0; 378 if (repeatInner || repeatMiddle || repeatOuter) { 379 // Add spacing if there is not already spacing 380 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) { 381 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status); 382 } 383 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) { 384 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status); 385 } 386 } 387 } 388 389 length1 += NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, string, UPRV_INDEX_0, status); 390 // ICU-21684: Write the second number to a temp string to avoid repeated insert operations 391 FormattedStringBuilder tempString; 392 NumberFormatterImpl::writeNumber(micros2.simple, data.quantity2, tempString, 0, status); 393 length2 += string.insert(UPRV_INDEX_2, tempString, status); 394 395 // TODO: Support padding? 396 397 if (collapseInner) { 398 const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner); 399 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); 400 lengthPrefix += mod.getPrefixLength(); 401 lengthSuffix -= mod.getPrefixLength(); 402 } else { 403 length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); 404 length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); 405 } 406 407 if (collapseMiddle) { 408 const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle); 409 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); 410 lengthPrefix += mod.getPrefixLength(); 411 lengthSuffix -= mod.getPrefixLength(); 412 } else { 413 length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); 414 length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); 415 } 416 417 if (collapseOuter) { 418 const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter); 419 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); 420 lengthPrefix += mod.getPrefixLength(); 421 lengthSuffix -= mod.getPrefixLength(); 422 } else { 423 length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); 424 length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); 425 } 426 427 // Now that all pieces are added, save the span info. 428 data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status); 429 data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status); 430 } 431 432 433 const Modifier& 434 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const { 435 Modifier::Parameters parameters; 436 first.getParameters(parameters); 437 if (parameters.obj == nullptr) { 438 // No plural form; return a fallback (e.g., the first) 439 return first; 440 } 441 StandardPlural::Form firstPlural = parameters.plural; 442 443 second.getParameters(parameters); 444 if (parameters.obj == nullptr) { 445 // No plural form; return a fallback (e.g., the first) 446 return first; 447 } 448 StandardPlural::Form secondPlural = parameters.plural; 449 450 // Get the required plural form from data 451 StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural); 452 453 // Get and return the new Modifier 454 const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural); 455 U_ASSERT(mod != nullptr); 456 return *mod; 457 } 458 459 460 461 #endif /* #if !UCONFIG_NO_FORMATTING */