NumberFormat.h (13871B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #ifndef intl_components_NumberFormat_h_ 5 #define intl_components_NumberFormat_h_ 6 #include <string_view> 7 #include <utility> 8 9 #include "mozilla/intl/ICU4CGlue.h" 10 #include "mozilla/Maybe.h" 11 #include "mozilla/PodOperations.h" 12 #include "mozilla/Result.h" 13 #include "mozilla/intl/ICUError.h" 14 #include "mozilla/intl/NumberPart.h" 15 16 #include "unicode/ustring.h" 17 #include "unicode/unum.h" 18 #include "unicode/unumberformatter.h" 19 20 struct UPluralRules; 21 22 namespace mozilla::intl { 23 24 struct PluralRulesOptions; 25 26 /** 27 * Configure NumberFormat options. 28 * The supported display styles are: 29 * * Decimal (default) 30 * * Currency (controlled by mCurrency) 31 * * Unit (controlled by mUnit) 32 * * Percent (controlled by mPercent) 33 * 34 * Only one of mCurrency, mUnit or mPercent should be set. If none are set, 35 * the number will formatted as a decimal. 36 * 37 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit 38 */ 39 struct MOZ_STACK_CLASS NumberFormatOptions { 40 /** 41 * Display a currency amount. |currency| must be a three-letter currency code. 42 * 43 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit 44 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width 45 */ 46 enum class CurrencyDisplay { 47 Symbol, 48 Code, 49 Name, 50 NarrowSymbol, 51 }; 52 Maybe<std::pair<std::string_view, CurrencyDisplay>> mCurrency; 53 54 /** 55 * Set the fraction digits settings. |min| can be zero, |max| must be 56 * larger-or-equal to |min|. 57 * 58 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision 59 */ 60 Maybe<std::pair<uint32_t, uint32_t>> mFractionDigits; 61 62 /** 63 * Set the minimum number of integer digits. |min| must be a non-zero 64 * number. 65 * 66 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#integer-width 67 */ 68 Maybe<uint32_t> mMinIntegerDigits; 69 70 /** 71 * Set the significant digits settings. |min| must be a non-zero number, |max| 72 * must be larger-or-equal to |min|. 73 * 74 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#significant-digits-precision 75 */ 76 Maybe<std::pair<uint32_t, uint32_t>> mSignificantDigits; 77 78 /** 79 * Display a unit amount. |unit| must be a well-formed unit identifier. 80 * 81 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit 82 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#per-unit 83 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width 84 */ 85 enum class UnitDisplay { Short, Narrow, Long }; 86 Maybe<std::pair<std::string_view, UnitDisplay>> mUnit; 87 88 /** 89 * Display a percent number. 90 * 91 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit 92 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#scale 93 */ 94 bool mPercent = false; 95 96 /** 97 * Set to true to strip trailing zeros after the decimal point for integer 98 * values. 99 * 100 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#trailing-zero-display 101 */ 102 bool mStripTrailingZero = false; 103 104 /** 105 * Enable or disable grouping. 106 * 107 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#grouping 108 */ 109 enum class Grouping { 110 Auto, 111 Always, 112 Min2, 113 Never, 114 } mGrouping = Grouping::Auto; 115 116 /** 117 * Set the notation style. 118 * 119 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#notation 120 */ 121 enum class Notation { 122 Standard, 123 Scientific, 124 Engineering, 125 CompactShort, 126 CompactLong 127 } mNotation = Notation::Standard; 128 129 /** 130 * Set the sign-display. 131 * 132 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#sign-display 133 */ 134 enum class SignDisplay { 135 Auto, 136 Never, 137 Always, 138 ExceptZero, 139 Negative, 140 Accounting, 141 AccountingAlways, 142 AccountingExceptZero, 143 AccountingNegative, 144 } mSignDisplay = SignDisplay::Auto; 145 146 /** 147 * Set the rounding increment, which must be a non-zero number. 148 * 149 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#precision 150 */ 151 uint32_t mRoundingIncrement = 1; 152 153 /** 154 * Set the rounding mode. 155 * 156 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#rounding-mode 157 */ 158 enum class RoundingMode { 159 Ceil, 160 Floor, 161 Expand, 162 Trunc, 163 HalfCeil, 164 HalfFloor, 165 HalfExpand, 166 HalfTrunc, 167 HalfEven, 168 HalfOdd, 169 } mRoundingMode = RoundingMode::HalfExpand; 170 171 /** 172 * Set the rounding priority. |mFractionDigits| and |mSignificantDigits| must 173 * both be set if the rounding priority isn't equal to "auto". 174 * 175 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision 176 */ 177 enum class RoundingPriority { 178 Auto, 179 MorePrecision, 180 LessPrecision, 181 } mRoundingPriority = RoundingPriority::Auto; 182 }; 183 184 /** 185 * According to http://userguide.icu-project.org/design, as long as we constrain 186 * ourselves to const APIs ICU is const-correct. 187 */ 188 189 /** 190 * A NumberFormat implementation that roughly mirrors the API provided by 191 * the ECMA-402 Intl.NumberFormat object. 192 * 193 * https://tc39.es/ecma402/#numberformat-objects 194 */ 195 class NumberFormat final { 196 public: 197 /** 198 * Initialize a new NumberFormat for the provided locale and using the 199 * provided options. 200 * 201 * https://tc39.es/ecma402/#sec-initializenumberformat 202 */ 203 static Result<UniquePtr<NumberFormat>, ICUError> TryCreate( 204 std::string_view aLocale, const NumberFormatOptions& aOptions); 205 206 NumberFormat() = default; 207 NumberFormat(const NumberFormat&) = delete; 208 NumberFormat& operator=(const NumberFormat&) = delete; 209 ~NumberFormat(); 210 211 /** 212 * Formats a double to a utf-16 string. The string view is valid until 213 * another number is formatted. Accessing the string view after this event 214 * is undefined behavior. 215 * 216 * https://tc39.es/ecma402/#sec-formatnumberstring 217 */ 218 Result<std::u16string_view, ICUError> format(double number) const { 219 if (!formatInternal(number)) { 220 return Err(ICUError::InternalError); 221 } 222 223 return formatResult(); 224 } 225 226 /** 227 * Formats a double to a utf-16 string, and fills the provided parts vector. 228 * The string view is valid until another number is formatted. Accessing the 229 * string view after this event is undefined behavior. 230 * 231 * This is utf-16 only because the only current use case is in 232 * SpiderMonkey. Supporting utf-8 would require recalculating the offsets 233 * in NumberPartVector from fixed width to variable width, which might be 234 * tricky to get right and is work that won't be necessary if we switch to 235 * ICU4X (see Bug 1707035). 236 * 237 * https://tc39.es/ecma402/#sec-partitionnumberpattern 238 */ 239 Result<std::u16string_view, ICUError> formatToParts( 240 double number, NumberPartVector& parts) const; 241 242 /** 243 * Formats a double to the provider buffer (either utf-8 or utf-16) 244 * 245 * https://tc39.es/ecma402/#sec-formatnumberstring 246 */ 247 template <typename B> 248 Result<Ok, ICUError> format(double number, B& buffer) const { 249 if (!formatInternal(number)) { 250 return Err(ICUError::InternalError); 251 } 252 253 return formatResult<typename B::CharType, B>(buffer); 254 } 255 256 /** 257 * Formats an int64_t to a utf-16 string. The string view is valid until 258 * another number is formatted. Accessing the string view after this event is 259 * undefined behavior. 260 * 261 * https://tc39.es/ecma402/#sec-formatnumberstring 262 */ 263 Result<std::u16string_view, ICUError> format(int64_t number) const { 264 if (!formatInternal(number)) { 265 return Err(ICUError::InternalError); 266 } 267 268 return formatResult(); 269 } 270 271 /** 272 * Formats a int64_t to a utf-16 string, and fills the provided parts vector. 273 * The string view is valid until another number is formatted. Accessing the 274 * string view after this event is undefined behavior. 275 * 276 * This is utf-16 only because the only current use case is in 277 * SpiderMonkey. Supporting utf-8 would require recalculating the offsets 278 * in NumberPartVector from fixed width to variable width, which might be 279 * tricky to get right and is work that won't be necessary if we switch to 280 * ICU4X (see Bug 1707035). 281 * 282 * https://tc39.es/ecma402/#sec-partitionnumberpattern 283 */ 284 Result<std::u16string_view, ICUError> formatToParts( 285 int64_t number, NumberPartVector& parts) const; 286 287 /** 288 * Formats an int64_t to the provider buffer (either utf-8 or utf-16). 289 * 290 * https://tc39.es/ecma402/#sec-formatnumberstring 291 */ 292 template <typename B> 293 Result<Ok, ICUError> format(int64_t number, B& buffer) const { 294 if (!formatInternal(number)) { 295 return Err(ICUError::InternalError); 296 } 297 298 return formatResult<typename B::CharType, B>(buffer); 299 } 300 301 /** 302 * Formats a string encoded decimal number to a utf-16 string. The string view 303 * is valid until another number is formatted. Accessing the string view 304 * after this event is undefined behavior. 305 * 306 * https://tc39.es/ecma402/#sec-formatnumberstring 307 */ 308 Result<std::u16string_view, ICUError> format(std::string_view number) const { 309 if (!formatInternal(number)) { 310 return Err(ICUError::InternalError); 311 } 312 313 return formatResult(); 314 } 315 316 /** 317 * Formats a string encoded decimal number to a utf-16 string, and fills the 318 * provided parts vector. The string view is valid until another number is 319 * formatted. Accessing the string view after this event is undefined 320 * behavior. 321 * 322 * This is utf-16 only because the only current use case is in 323 * SpiderMonkey. Supporting utf-8 would require recalculating the offsets 324 * in NumberPartVector from fixed width to variable width, which might be 325 * tricky to get right and is work that won't be necessary if we switch to 326 * ICU4X (see Bug 1707035). 327 * 328 * https://tc39.es/ecma402/#sec-partitionnumberpattern 329 */ 330 Result<std::u16string_view, ICUError> formatToParts( 331 std::string_view number, NumberPartVector& parts) const; 332 333 /** 334 * Formats a string encoded decimal number to the provider buffer 335 * (either utf-8 or utf-16). 336 * 337 * https://tc39.es/ecma402/#sec-formatnumberstring 338 */ 339 template <typename B> 340 Result<Ok, ICUError> format(std::string_view number, B& buffer) const { 341 if (!formatInternal(number)) { 342 return Err(ICUError::InternalError); 343 } 344 345 return formatResult<typename B::CharType, B>(buffer); 346 } 347 348 /** 349 * Formats the number and selects the keyword by using a provided 350 * UPluralRules object. 351 * 352 * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.select 353 * 354 * TODO(1713917) This is necessary because both PluralRules and 355 * NumberFormat have a shared dependency on the raw UFormattedNumber 356 * type. Once we transition to using ICU4X, the FFI calls should no 357 * longer require such shared dependencies. At that time, this 358 * functionality should be removed from NumberFormat and invoked 359 * solely from PluralRules. 360 */ 361 Result<int32_t, ICUError> selectFormatted(double number, char16_t* keyword, 362 int32_t keywordSize, 363 UPluralRules* pluralRules) const; 364 365 /** 366 * Returns an iterator over all supported number formatter locales. 367 * 368 * The returned strings are ICU locale identifiers and NOT BCP 47 language 369 * tags. 370 * 371 * Also see <https://unicode-org.github.io/icu/userguide/locale>. 372 */ 373 static auto GetAvailableLocales() { 374 return AvailableLocalesEnumeration<unum_countAvailable, 375 unum_getAvailable>(); 376 } 377 378 private: 379 UNumberFormatter* mNumberFormatter = nullptr; 380 UFormattedNumber* mFormattedNumber = nullptr; 381 bool mFormatForUnit = false; 382 383 Result<Ok, ICUError> initialize(std::string_view aLocale, 384 const NumberFormatOptions& aOptions); 385 386 [[nodiscard]] bool formatInternal(double number) const; 387 [[nodiscard]] bool formatInternal(int64_t number) const; 388 [[nodiscard]] bool formatInternal(std::string_view number) const; 389 390 Result<std::u16string_view, ICUError> formatResult() const; 391 392 template <typename C, typename B> 393 Result<Ok, ICUError> formatResult(B& buffer) const { 394 // We only support buffers with char or char16_t. 395 static_assert(std::is_same_v<C, char> || std::is_same_v<C, char16_t>); 396 397 return formatResult().andThen( 398 [&buffer](std::u16string_view result) -> Result<Ok, ICUError> { 399 if constexpr (std::is_same_v<C, char>) { 400 if (!FillBuffer(Span(result.data(), result.size()), buffer)) { 401 return Err(ICUError::OutOfMemory); 402 } 403 return Ok(); 404 } else { 405 // ICU provides APIs which accept a buffer, but they just copy from 406 // an internal buffer behind the scenes anyway. 407 if (!buffer.reserve(result.size())) { 408 return Err(ICUError::OutOfMemory); 409 } 410 PodCopy(static_cast<char16_t*>(buffer.data()), result.data(), 411 result.size()); 412 buffer.written(result.size()); 413 414 return Ok(); 415 } 416 }); 417 } 418 }; 419 420 } // namespace mozilla::intl 421 422 #endif