ListFormat.h (7814B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #ifndef intl_components_ListFormat_h_ 5 #define intl_components_ListFormat_h_ 6 7 #include "mozilla/CheckedInt.h" 8 #include "mozilla/intl/ICU4CGlue.h" 9 #include "mozilla/Try.h" 10 #include "mozilla/Vector.h" 11 #include "unicode/ulistformatter.h" 12 13 struct UListFormatter; 14 15 namespace mozilla::intl { 16 17 static constexpr size_t DEFAULT_LIST_LENGTH = 8; 18 19 /** 20 * This component is a Mozilla-focused API for the list formatting provided by 21 * ICU. It implements the API provided by the ECMA-402 Intl.ListFormat object. 22 * 23 * https://tc39.es/ecma402/#listformat-objects 24 */ 25 class ListFormat final { 26 public: 27 /** 28 * The [[Type]] and [[Style]] properties of ListFormat instances. 29 * 30 * https://tc39.es/ecma402/#sec-properties-of-intl-listformat-instances 31 */ 32 // [[Type]] 33 enum class Type { Conjunction, Disjunction, Unit }; 34 // [[Style]] 35 enum class Style { Long, Short, Narrow }; 36 37 /** 38 * The 'options' object to create Intl.ListFormat instance. 39 * 40 * https://tc39.es/ecma402/#sec-Intl.ListFormat 41 */ 42 struct Options { 43 // "conjunction" is the default fallback value. 44 Type mType = Type::Conjunction; 45 46 // "long" is the default fallback value. 47 Style mStyle = Style::Long; 48 }; 49 50 /** 51 * Create a ListFormat object for the provided locale and options. 52 * 53 * https://tc39.es/ecma402/#sec-Intl.ListFormat 54 */ 55 static Result<UniquePtr<ListFormat>, ICUError> TryCreate( 56 mozilla::Span<const char> aLocale, const Options& aOptions); 57 58 ~ListFormat(); 59 60 /** 61 * The list of String values for FormatList and FormatListToParts. 62 * 63 * https://tc39.es/ecma402/#sec-formatlist 64 * https://tc39.es/ecma402/#sec-formatlisttoparts 65 */ 66 using StringList = 67 mozilla::Vector<mozilla::Span<const char16_t>, DEFAULT_LIST_LENGTH>; 68 69 /** 70 * Format the list according and write the result in buffer. 71 * 72 * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.format 73 * https://tc39.es/ecma402/#sec-formatlist 74 */ 75 template <typename Buffer> 76 ICUResult Format(const StringList& list, Buffer& buffer) const { 77 static_assert(std::is_same_v<typename Buffer::CharType, char16_t>, 78 "Currently only UTF-16 buffers are supported."); 79 80 mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings; 81 mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens; 82 MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens)); 83 84 int32_t u16stringCount = mozilla::AssertedCast<int32_t>(list.length()); 85 MOZ_TRY(FillBufferWithICUCall( 86 buffer, [this, &u16strings, &u16stringLens, u16stringCount]( 87 char16_t* chars, int32_t size, UErrorCode* status) { 88 return ulistfmt_format(mListFormatter.GetConst(), u16strings.begin(), 89 u16stringLens.begin(), u16stringCount, chars, 90 size, status); 91 })); 92 93 return Ok{}; 94 } 95 96 /** 97 * The corresponding list of parts according to the effective locale and the 98 * formatting options of ListFormat. 99 * Each part has a [[Type]] field, which must be "element" or "literal", and a 100 * [[Value]] field. 101 * 102 * To store Part more efficiently, it doesn't store the ||Value|| of type 103 * string in this struct. Instead, it stores the end index of the string in 104 * the buffer(which is passed to ListFormat::FormatToParts()). The begin index 105 * of the ||Value|| is the index of the previous part. 106 * 107 * Buffer 108 * 0 i j 109 * +---------------+---------------+---------------+ 110 * | Part[0].Value | Part[1].Value | Part[2].Value | .... 111 * +---------------+---------------+---------------+ 112 * 113 * Part[0].index is i. Part[0].Value is stored in the Buffer[0..i]. 114 * Part[1].index is j. Part[1].Value is stored in the Buffer[i..j]. 115 * 116 * See https://tc39.es/ecma402/#sec-createpartsfromlist 117 */ 118 enum class PartType { 119 Element, 120 Literal, 121 }; 122 // The 2nd field is the end index to the buffer as mentioned above. 123 using Part = std::pair<PartType, size_t>; 124 using PartVector = mozilla::Vector<Part, DEFAULT_LIST_LENGTH>; 125 126 /** 127 * Format the list to a list of parts, and store the formatted result of 128 * UTF-16 string into buffer, and formatted parts into the vector 'parts'. 129 * 130 * See: 131 * https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts 132 * https://tc39.es/ecma402/#sec-formatlisttoparts 133 */ 134 template <typename Buffer> 135 ICUResult FormatToParts(const StringList& list, Buffer& buffer, 136 PartVector& parts) { 137 static_assert(std::is_same_v<typename Buffer::CharType, char16_t>, 138 "Currently only UTF-16 buffers are supported."); 139 140 mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH> u16strings; 141 mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH> u16stringLens; 142 MOZ_TRY(ConvertStringListToVectors(list, u16strings, u16stringLens)); 143 144 AutoFormattedList formatted; 145 UErrorCode status = U_ZERO_ERROR; 146 ulistfmt_formatStringsToResult( 147 mListFormatter.GetConst(), u16strings.begin(), u16stringLens.begin(), 148 int32_t(list.length()), formatted.GetFormatted(), &status); 149 if (U_FAILURE(status)) { 150 return Err(ToICUError(status)); 151 } 152 153 auto spanResult = formatted.ToSpan(); 154 if (spanResult.isErr()) { 155 return spanResult.propagateErr(); 156 } 157 auto formattedSpan = spanResult.unwrap(); 158 if (!FillBuffer(formattedSpan, buffer)) { 159 return Err(ICUError::OutOfMemory); 160 } 161 162 const UFormattedValue* value = formatted.Value(); 163 if (!value) { 164 return Err(ICUError::InternalError); 165 } 166 return FormattedToParts(value, buffer.length(), parts); 167 } 168 169 private: 170 ListFormat() = delete; 171 explicit ListFormat(UListFormatter* fmt) : mListFormatter(fmt) {} 172 ListFormat(const ListFormat&) = delete; 173 ListFormat& operator=(const ListFormat&) = delete; 174 175 ICUPointer<UListFormatter> mListFormatter = 176 ICUPointer<UListFormatter>(nullptr); 177 178 // Convert StringList to an array of type 'const char16_t*' and an array of 179 // int32 for ICU-API. 180 ICUResult ConvertStringListToVectors( 181 const StringList& list, 182 mozilla::Vector<const char16_t*, DEFAULT_LIST_LENGTH>& u16strings, 183 mozilla::Vector<int32_t, DEFAULT_LIST_LENGTH>& u16stringLens) const { 184 // Keep a conservative running count of overall length. 185 mozilla::CheckedInt<int32_t> stringLengthTotal(0); 186 for (const auto& string : list) { 187 if (!u16strings.append(string.data())) { 188 return Err(ICUError::InternalError); 189 } 190 191 int32_t len = mozilla::AssertedCast<int32_t>(string.size()); 192 if (!u16stringLens.append(len)) { 193 return Err(ICUError::InternalError); 194 } 195 196 stringLengthTotal += len; 197 } 198 199 // Add space for N unrealistically large conjunctions. 200 constexpr int32_t MaxConjunctionLen = 100; 201 stringLengthTotal += CheckedInt<int32_t>(list.length()) * MaxConjunctionLen; 202 // If the overestimate exceeds ICU length limits, don't try to format. 203 if (!stringLengthTotal.isValid()) { 204 return Err(ICUError::OverflowError); 205 } 206 207 return Ok{}; 208 } 209 210 using AutoFormattedList = 211 AutoFormattedResult<UFormattedList, ulistfmt_openResult, 212 ulistfmt_resultAsValue, ulistfmt_closeResult>; 213 214 ICUResult FormattedToParts(const UFormattedValue* formattedValue, 215 size_t formattedSize, PartVector& parts); 216 217 static UListFormatterType ToUListFormatterType(Type type); 218 static UListFormatterWidth ToUListFormatterWidth(Style style); 219 }; 220 221 } // namespace mozilla::intl 222 #endif // intl_components_ListFormat_h_