tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

NumberFormat.h (13871B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #ifndef intl_components_NumberFormat_h_
      5 #define intl_components_NumberFormat_h_
      6 #include <string_view>
      7 #include <utility>
      8 
      9 #include "mozilla/intl/ICU4CGlue.h"
     10 #include "mozilla/Maybe.h"
     11 #include "mozilla/PodOperations.h"
     12 #include "mozilla/Result.h"
     13 #include "mozilla/intl/ICUError.h"
     14 #include "mozilla/intl/NumberPart.h"
     15 
     16 #include "unicode/ustring.h"
     17 #include "unicode/unum.h"
     18 #include "unicode/unumberformatter.h"
     19 
     20 struct UPluralRules;
     21 
     22 namespace mozilla::intl {
     23 
     24 struct PluralRulesOptions;
     25 
     26 /**
     27 * Configure NumberFormat options.
     28 * The supported display styles are:
     29 *   * Decimal (default)
     30 *   * Currency (controlled by mCurrency)
     31 *   * Unit (controlled by mUnit)
     32 *   * Percent (controlled by mPercent)
     33 *
     34 * Only one of mCurrency, mUnit or mPercent should be set. If none are set,
     35 * the number will formatted as a decimal.
     36 *
     37 * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
     38 */
     39 struct MOZ_STACK_CLASS NumberFormatOptions {
     40  /**
     41   * Display a currency amount. |currency| must be a three-letter currency code.
     42   *
     43   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
     44   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width
     45   */
     46  enum class CurrencyDisplay {
     47    Symbol,
     48    Code,
     49    Name,
     50    NarrowSymbol,
     51  };
     52  Maybe<std::pair<std::string_view, CurrencyDisplay>> mCurrency;
     53 
     54  /**
     55   * Set the fraction digits settings. |min| can be zero, |max| must be
     56   * larger-or-equal to |min|.
     57   *
     58   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision
     59   */
     60  Maybe<std::pair<uint32_t, uint32_t>> mFractionDigits;
     61 
     62  /**
     63   * Set the minimum number of integer digits. |min| must be a non-zero
     64   * number.
     65   *
     66   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#integer-width
     67   */
     68  Maybe<uint32_t> mMinIntegerDigits;
     69 
     70  /**
     71   * Set the significant digits settings. |min| must be a non-zero number, |max|
     72   * must be larger-or-equal to |min|.
     73   *
     74   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#significant-digits-precision
     75   */
     76  Maybe<std::pair<uint32_t, uint32_t>> mSignificantDigits;
     77 
     78  /**
     79   * Display a unit amount. |unit| must be a well-formed unit identifier.
     80   *
     81   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
     82   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#per-unit
     83   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit-width
     84   */
     85  enum class UnitDisplay { Short, Narrow, Long };
     86  Maybe<std::pair<std::string_view, UnitDisplay>> mUnit;
     87 
     88  /**
     89   * Display a percent number.
     90   *
     91   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#unit
     92   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#scale
     93   */
     94  bool mPercent = false;
     95 
     96  /**
     97   * Set to true to strip trailing zeros after the decimal point for integer
     98   * values.
     99   *
    100   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#trailing-zero-display
    101   */
    102  bool mStripTrailingZero = false;
    103 
    104  /**
    105   * Enable or disable grouping.
    106   *
    107   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#grouping
    108   */
    109  enum class Grouping {
    110    Auto,
    111    Always,
    112    Min2,
    113    Never,
    114  } mGrouping = Grouping::Auto;
    115 
    116  /**
    117   * Set the notation style.
    118   *
    119   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#notation
    120   */
    121  enum class Notation {
    122    Standard,
    123    Scientific,
    124    Engineering,
    125    CompactShort,
    126    CompactLong
    127  } mNotation = Notation::Standard;
    128 
    129  /**
    130   * Set the sign-display.
    131   *
    132   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#sign-display
    133   */
    134  enum class SignDisplay {
    135    Auto,
    136    Never,
    137    Always,
    138    ExceptZero,
    139    Negative,
    140    Accounting,
    141    AccountingAlways,
    142    AccountingExceptZero,
    143    AccountingNegative,
    144  } mSignDisplay = SignDisplay::Auto;
    145 
    146  /**
    147   * Set the rounding increment, which must be a non-zero number.
    148   *
    149   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#precision
    150   */
    151  uint32_t mRoundingIncrement = 1;
    152 
    153  /**
    154   * Set the rounding mode.
    155   *
    156   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#rounding-mode
    157   */
    158  enum class RoundingMode {
    159    Ceil,
    160    Floor,
    161    Expand,
    162    Trunc,
    163    HalfCeil,
    164    HalfFloor,
    165    HalfExpand,
    166    HalfTrunc,
    167    HalfEven,
    168    HalfOdd,
    169  } mRoundingMode = RoundingMode::HalfExpand;
    170 
    171  /**
    172   * Set the rounding priority. |mFractionDigits| and |mSignificantDigits| must
    173   * both be set if the rounding priority isn't equal to "auto".
    174   *
    175   * https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md#fraction-precision
    176   */
    177  enum class RoundingPriority {
    178    Auto,
    179    MorePrecision,
    180    LessPrecision,
    181  } mRoundingPriority = RoundingPriority::Auto;
    182 };
    183 
    184 /**
    185 * According to http://userguide.icu-project.org/design, as long as we constrain
    186 * ourselves to const APIs ICU is const-correct.
    187 */
    188 
    189 /**
    190 * A NumberFormat implementation that roughly mirrors the API provided by
    191 * the ECMA-402 Intl.NumberFormat object.
    192 *
    193 * https://tc39.es/ecma402/#numberformat-objects
    194 */
    195 class NumberFormat final {
    196 public:
    197  /**
    198   * Initialize a new NumberFormat for the provided locale and using the
    199   * provided options.
    200   *
    201   * https://tc39.es/ecma402/#sec-initializenumberformat
    202   */
    203  static Result<UniquePtr<NumberFormat>, ICUError> TryCreate(
    204      std::string_view aLocale, const NumberFormatOptions& aOptions);
    205 
    206  NumberFormat() = default;
    207  NumberFormat(const NumberFormat&) = delete;
    208  NumberFormat& operator=(const NumberFormat&) = delete;
    209  ~NumberFormat();
    210 
    211  /**
    212   * Formats a double to a utf-16 string. The string view is valid until
    213   * another number is formatted. Accessing the string view after this event
    214   * is undefined behavior.
    215   *
    216   * https://tc39.es/ecma402/#sec-formatnumberstring
    217   */
    218  Result<std::u16string_view, ICUError> format(double number) const {
    219    if (!formatInternal(number)) {
    220      return Err(ICUError::InternalError);
    221    }
    222 
    223    return formatResult();
    224  }
    225 
    226  /**
    227   * Formats a double to a utf-16 string, and fills the provided parts vector.
    228   * The string view is valid until another number is formatted. Accessing the
    229   * string view after this event is undefined behavior.
    230   *
    231   * This is utf-16 only because the only current use case is in
    232   * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
    233   * in NumberPartVector from fixed width to variable width, which might be
    234   * tricky to get right and is work that won't be necessary if we switch to
    235   * ICU4X (see Bug 1707035).
    236   *
    237   * https://tc39.es/ecma402/#sec-partitionnumberpattern
    238   */
    239  Result<std::u16string_view, ICUError> formatToParts(
    240      double number, NumberPartVector& parts) const;
    241 
    242  /**
    243   * Formats a double to the provider buffer (either utf-8 or utf-16)
    244   *
    245   * https://tc39.es/ecma402/#sec-formatnumberstring
    246   */
    247  template <typename B>
    248  Result<Ok, ICUError> format(double number, B& buffer) const {
    249    if (!formatInternal(number)) {
    250      return Err(ICUError::InternalError);
    251    }
    252 
    253    return formatResult<typename B::CharType, B>(buffer);
    254  }
    255 
    256  /**
    257   * Formats an int64_t to a utf-16 string. The string view is valid until
    258   * another number is formatted. Accessing the string view after this event is
    259   * undefined behavior.
    260   *
    261   * https://tc39.es/ecma402/#sec-formatnumberstring
    262   */
    263  Result<std::u16string_view, ICUError> format(int64_t number) const {
    264    if (!formatInternal(number)) {
    265      return Err(ICUError::InternalError);
    266    }
    267 
    268    return formatResult();
    269  }
    270 
    271  /**
    272   * Formats a int64_t to a utf-16 string, and fills the provided parts vector.
    273   * The string view is valid until another number is formatted. Accessing the
    274   * string view after this event is undefined behavior.
    275   *
    276   * This is utf-16 only because the only current use case is in
    277   * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
    278   * in NumberPartVector from fixed width to variable width, which might be
    279   * tricky to get right and is work that won't be necessary if we switch to
    280   * ICU4X (see Bug 1707035).
    281   *
    282   * https://tc39.es/ecma402/#sec-partitionnumberpattern
    283   */
    284  Result<std::u16string_view, ICUError> formatToParts(
    285      int64_t number, NumberPartVector& parts) const;
    286 
    287  /**
    288   * Formats an int64_t to the provider buffer (either utf-8 or utf-16).
    289   *
    290   * https://tc39.es/ecma402/#sec-formatnumberstring
    291   */
    292  template <typename B>
    293  Result<Ok, ICUError> format(int64_t number, B& buffer) const {
    294    if (!formatInternal(number)) {
    295      return Err(ICUError::InternalError);
    296    }
    297 
    298    return formatResult<typename B::CharType, B>(buffer);
    299  }
    300 
    301  /**
    302   * Formats a string encoded decimal number to a utf-16 string. The string view
    303   * is valid until another number is formatted. Accessing the string view
    304   * after this event is undefined behavior.
    305   *
    306   * https://tc39.es/ecma402/#sec-formatnumberstring
    307   */
    308  Result<std::u16string_view, ICUError> format(std::string_view number) const {
    309    if (!formatInternal(number)) {
    310      return Err(ICUError::InternalError);
    311    }
    312 
    313    return formatResult();
    314  }
    315 
    316  /**
    317   * Formats a string encoded decimal number to a utf-16 string, and fills the
    318   * provided parts vector. The string view is valid until another number is
    319   * formatted. Accessing the string view after this event is undefined
    320   * behavior.
    321   *
    322   * This is utf-16 only because the only current use case is in
    323   * SpiderMonkey. Supporting utf-8 would require recalculating the offsets
    324   * in NumberPartVector from fixed width to variable width, which might be
    325   * tricky to get right and is work that won't be necessary if we switch to
    326   * ICU4X (see Bug 1707035).
    327   *
    328   * https://tc39.es/ecma402/#sec-partitionnumberpattern
    329   */
    330  Result<std::u16string_view, ICUError> formatToParts(
    331      std::string_view number, NumberPartVector& parts) const;
    332 
    333  /**
    334   * Formats a string encoded decimal number to the provider buffer
    335   * (either utf-8 or utf-16).
    336   *
    337   * https://tc39.es/ecma402/#sec-formatnumberstring
    338   */
    339  template <typename B>
    340  Result<Ok, ICUError> format(std::string_view number, B& buffer) const {
    341    if (!formatInternal(number)) {
    342      return Err(ICUError::InternalError);
    343    }
    344 
    345    return formatResult<typename B::CharType, B>(buffer);
    346  }
    347 
    348  /**
    349   * Formats the number and selects the keyword by using a provided
    350   * UPluralRules object.
    351   *
    352   * https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.select
    353   *
    354   * TODO(1713917) This is necessary because both PluralRules and
    355   * NumberFormat have a shared dependency on the raw UFormattedNumber
    356   * type. Once we transition to using ICU4X, the FFI calls should no
    357   * longer require such shared dependencies. At that time, this
    358   * functionality should be removed from NumberFormat and invoked
    359   * solely from PluralRules.
    360   */
    361  Result<int32_t, ICUError> selectFormatted(double number, char16_t* keyword,
    362                                            int32_t keywordSize,
    363                                            UPluralRules* pluralRules) const;
    364 
    365  /**
    366   * Returns an iterator over all supported number formatter locales.
    367   *
    368   * The returned strings are ICU locale identifiers and NOT BCP 47 language
    369   * tags.
    370   *
    371   * Also see <https://unicode-org.github.io/icu/userguide/locale>.
    372   */
    373  static auto GetAvailableLocales() {
    374    return AvailableLocalesEnumeration<unum_countAvailable,
    375                                       unum_getAvailable>();
    376  }
    377 
    378 private:
    379  UNumberFormatter* mNumberFormatter = nullptr;
    380  UFormattedNumber* mFormattedNumber = nullptr;
    381  bool mFormatForUnit = false;
    382 
    383  Result<Ok, ICUError> initialize(std::string_view aLocale,
    384                                  const NumberFormatOptions& aOptions);
    385 
    386  [[nodiscard]] bool formatInternal(double number) const;
    387  [[nodiscard]] bool formatInternal(int64_t number) const;
    388  [[nodiscard]] bool formatInternal(std::string_view number) const;
    389 
    390  Result<std::u16string_view, ICUError> formatResult() const;
    391 
    392  template <typename C, typename B>
    393  Result<Ok, ICUError> formatResult(B& buffer) const {
    394    // We only support buffers with char or char16_t.
    395    static_assert(std::is_same_v<C, char> || std::is_same_v<C, char16_t>);
    396 
    397    return formatResult().andThen(
    398        [&buffer](std::u16string_view result) -> Result<Ok, ICUError> {
    399          if constexpr (std::is_same_v<C, char>) {
    400            if (!FillBuffer(Span(result.data(), result.size()), buffer)) {
    401              return Err(ICUError::OutOfMemory);
    402            }
    403            return Ok();
    404          } else {
    405            // ICU provides APIs which accept a buffer, but they just copy from
    406            // an internal buffer behind the scenes anyway.
    407            if (!buffer.reserve(result.size())) {
    408              return Err(ICUError::OutOfMemory);
    409            }
    410            PodCopy(static_cast<char16_t*>(buffer.data()), result.data(),
    411                    result.size());
    412            buffer.written(result.size());
    413 
    414            return Ok();
    415          }
    416        });
    417  }
    418 };
    419 
    420 }  // namespace mozilla::intl
    421 
    422 #endif