tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ListFormat.h (7814B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #ifndef intl_components_ListFormat_h_
      5 #define intl_components_ListFormat_h_
      6 
      7 #include "mozilla/CheckedInt.h"
      8 #include "mozilla/intl/ICU4CGlue.h"
      9 #include "mozilla/Try.h"
     10 #include "mozilla/Vector.h"
     11 #include "unicode/ulistformatter.h"
     12 
     13 struct UListFormatter;
     14 
     15 namespace mozilla::intl {
     16 
     17 static constexpr size_t DEFAULT_LIST_LENGTH = 8;
     18 
     19 /**
     20 * This component is a Mozilla-focused API for the list formatting provided by
     21 * ICU. It implements the API provided by the ECMA-402 Intl.ListFormat object.
     22 *
     23 * https://tc39.es/ecma402/#listformat-objects
     24 */
     25 class ListFormat final {
     26 public:
     27  /**
     28   * The [[Type]] and [[Style]] properties of ListFormat instances.
     29   *
     30   * https://tc39.es/ecma402/#sec-properties-of-intl-listformat-instances
     31   */
     32  // [[Type]]
     33  enum class Type { Conjunction, Disjunction, Unit };
     34  // [[Style]]
     35  enum class Style { Long, Short, Narrow };
     36 
     37  /**
     38   * The 'options' object to create Intl.ListFormat instance.
     39   *
     40   * https://tc39.es/ecma402/#sec-Intl.ListFormat
     41   */
     42  struct Options {
     43    // "conjunction" is the default fallback value.
     44    Type mType = Type::Conjunction;
     45 
     46    // "long" is the default fallback value.
     47    Style mStyle = Style::Long;
     48  };
     49 
     50  /**
     51   * Create a ListFormat object for the provided locale and options.
     52   *
     53   * https://tc39.es/ecma402/#sec-Intl.ListFormat
     54   */
     55  static Result<UniquePtr<ListFormat>, ICUError> TryCreate(
     56      mozilla::Span<const char> aLocale, const Options& aOptions);
     57 
     58  ~ListFormat();
     59 
     60  /**
     61   * The list of String values for FormatList and FormatListToParts.
     62   *
     63   * https://tc39.es/ecma402/#sec-formatlist
     64   * https://tc39.es/ecma402/#sec-formatlisttoparts
     65   */
     66  using StringList =
     67      mozilla::Vector<mozilla::Span<const char16_t>, DEFAULT_LIST_LENGTH>;
     68 
     69  /**
     70   * Format the list according and write the result in buffer.
     71   *
     72   * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.format
     73   * https://tc39.es/ecma402/#sec-formatlist
     74   */
     75  template <typename Buffer>
     76  ICUResult Format(const StringList& list, Buffer& buffer) const {
     77    static_assert(std::is_same_v<typename Buffer::CharType, char16_t>,
     78                  "Currently only UTF-16 buffers are supported.");
     79 
     80    mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings;
     81    mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens;
     82    MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens));
     83 
     84    int32_t u16stringCount = mozilla::AssertedCast<int32_t>(list.length());
     85    MOZ_TRY(FillBufferWithICUCall(
     86        buffer, [this, &u16strings, &u16stringLens, u16stringCount](
     87                    char16_t* chars, int32_t size, UErrorCode* status) {
     88          return ulistfmt_format(mListFormatter.GetConst(), u16strings.begin(),
     89                                 u16stringLens.begin(), u16stringCount, chars,
     90                                 size, status);
     91        }));
     92 
     93    return Ok{};
     94  }
     95 
     96  /**
     97   * The corresponding list of parts according to the effective locale and the
     98   * formatting options of ListFormat.
     99   * Each part has a [[Type]] field, which must be "element" or "literal", and a
    100   * [[Value]] field.
    101   *
    102   * To store Part more efficiently, it doesn't store the ||Value|| of type
    103   * string in this struct. Instead, it stores the end index of the string in
    104   * the buffer(which is passed to ListFormat::FormatToParts()). The begin index
    105   * of the ||Value|| is the index of the previous part.
    106   *
    107   *  Buffer
    108   *  0               i                j
    109   * +---------------+---------------+---------------+
    110   * | Part[0].Value | Part[1].Value | Part[2].Value | ....
    111   * +---------------+---------------+---------------+
    112   *
    113   *     Part[0].index is i. Part[0].Value is stored in the Buffer[0..i].
    114   *     Part[1].index is j. Part[1].Value is stored in the Buffer[i..j].
    115   *
    116   * See https://tc39.es/ecma402/#sec-createpartsfromlist
    117   */
    118  enum class PartType {
    119    Element,
    120    Literal,
    121  };
    122  // The 2nd field is the end index to the buffer as mentioned above.
    123  using Part = std::pair<PartType, size_t>;
    124  using PartVector = mozilla::Vector<Part, DEFAULT_LIST_LENGTH>;
    125 
    126  /**
    127   * Format the list to a list of parts, and store the formatted result of
    128   * UTF-16 string into buffer, and formatted parts into the vector 'parts'.
    129   *
    130   * See:
    131   * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts
    132   * https://tc39.es/ecma402/#sec-formatlisttoparts
    133   */
    134  template <typename Buffer>
    135  ICUResult FormatToParts(const StringList& list, Buffer& buffer,
    136                          PartVector& parts) {
    137    static_assert(std::is_same_v<typename Buffer::CharType, char16_t>,
    138                  "Currently only UTF-16 buffers are supported.");
    139 
    140    mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings;
    141    mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens;
    142    MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens));
    143 
    144    AutoFormattedList formatted;
    145    UErrorCode status = U_ZERO_ERROR;
    146    ulistfmt_formatStringsToResult(
    147        mListFormatter.GetConst(), u16strings.begin(), u16stringLens.begin(),
    148        int32_t(list.length()), formatted.GetFormatted(), &status);
    149    if (U_FAILURE(status)) {
    150      return Err(ToICUError(status));
    151    }
    152 
    153    auto spanResult = formatted.ToSpan();
    154    if (spanResult.isErr()) {
    155      return spanResult.propagateErr();
    156    }
    157    auto formattedSpan = spanResult.unwrap();
    158    if (!FillBuffer(formattedSpan, buffer)) {
    159      return Err(ICUError::OutOfMemory);
    160    }
    161 
    162    const UFormattedValue* value = formatted.Value();
    163    if (!value) {
    164      return Err(ICUError::InternalError);
    165    }
    166    return FormattedToParts(value, buffer.length(), parts);
    167  }
    168 
    169 private:
    170  ListFormat() = delete;
    171  explicit ListFormat(UListFormatter* fmt) : mListFormatter(fmt) {}
    172  ListFormat(const ListFormat&) = delete;
    173  ListFormat& operator=(const ListFormat&) = delete;
    174 
    175  ICUPointer<UListFormatter> mListFormatter =
    176      ICUPointer<UListFormatter>(nullptr);
    177 
    178  // Convert StringList to an array of type 'const char16_t*' and an array of
    179  // int32 for ICU-API.
    180  ICUResult ConvertStringListToVectors(
    181      const StringList& list,
    182      mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH>& u16strings,
    183      mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH>& u16stringLens) const {
    184    // Keep a conservative running count of overall length.
    185    mozilla::CheckedInt<int32_t> stringLengthTotal(0);
    186    for (const auto& string : list) {
    187      if (!u16strings.append(string.data())) {
    188        return Err(ICUError::InternalError);
    189      }
    190 
    191      int32_t len = mozilla::AssertedCast<int32_t>(string.size());
    192      if (!u16stringLens.append(len)) {
    193        return Err(ICUError::InternalError);
    194      }
    195 
    196      stringLengthTotal += len;
    197    }
    198 
    199    // Add space for N unrealistically large conjunctions.
    200    constexpr int32_t MaxConjunctionLen = 100;
    201    stringLengthTotal += CheckedInt<int32_t>(list.length()) * MaxConjunctionLen;
    202    // If the overestimate exceeds ICU length limits, don't try to format.
    203    if (!stringLengthTotal.isValid()) {
    204      return Err(ICUError::OverflowError);
    205    }
    206 
    207    return Ok{};
    208  }
    209 
    210  using AutoFormattedList =
    211      AutoFormattedResult<UFormattedList, ulistfmt_openResult,
    212                          ulistfmt_resultAsValue, ulistfmt_closeResult>;
    213 
    214  ICUResult FormattedToParts(const UFormattedValue* formattedValue,
    215                             size_t formattedSize, PartVector& parts);
    216 
    217  static UListFormatterType ToUListFormatterType(Type type);
    218  static UListFormatterWidth ToUListFormatterWidth(Style style);
    219 };
    220 
    221 }  // namespace mozilla::intl
    222 #endif  // intl_components_ListFormat_h_