number_skeletons.h (13403B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __SOURCE_NUMBER_SKELETONS_H__ 8 #define __SOURCE_NUMBER_SKELETONS_H__ 9 10 #include "number_types.h" 11 #include "numparse_types.h" 12 #include "unicode/ucharstrie.h" 13 #include "string_segment.h" 14 15 U_NAMESPACE_BEGIN 16 namespace number::impl { 17 18 // Forward-declaration 19 struct SeenMacroProps; 20 21 // namespace for enums and entrypoint functions 22 namespace skeleton { 23 24 //////////////////////////////////////////////////////////////////////////////////////// 25 // NOTE: For examples of how to add a new stem to the number skeleton parser, see: // 26 // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 // 27 // and // 28 // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 // 29 //////////////////////////////////////////////////////////////////////////////////////// 30 31 /** 32 * While parsing a skeleton, this enum records what type of option we expect to find next. 33 */ 34 enum ParseState { 35 36 // Section 0: We expect whitespace or a stem, but not an option: 37 38 STATE_NULL, 39 40 // Section 1: We might accept an option, but it is not required: 41 42 STATE_SCIENTIFIC, 43 STATE_FRACTION_PRECISION, 44 STATE_PRECISION, 45 46 // Section 2: An option is required: 47 48 STATE_INCREMENT_PRECISION, 49 STATE_MEASURE_UNIT, 50 STATE_PER_MEASURE_UNIT, 51 STATE_IDENTIFIER_UNIT, 52 STATE_UNIT_USAGE, 53 STATE_CURRENCY_UNIT, 54 STATE_INTEGER_WIDTH, 55 STATE_NUMBERING_SYSTEM, 56 STATE_SCALE, 57 }; 58 59 /** 60 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem 61 * string literal written in upper snake case. 62 * 63 * @see StemToObject 64 * @see #SERIALIZED_STEM_TRIE 65 */ 66 enum StemEnum { 67 68 // Section 1: Stems that do not require an option: 69 70 STEM_COMPACT_SHORT, 71 STEM_COMPACT_LONG, 72 STEM_SCIENTIFIC, 73 STEM_ENGINEERING, 74 STEM_NOTATION_SIMPLE, 75 STEM_BASE_UNIT, 76 STEM_PERCENT, 77 STEM_PERMILLE, 78 STEM_PERCENT_100, // concise-only 79 STEM_PRECISION_INTEGER, 80 STEM_PRECISION_UNLIMITED, 81 STEM_PRECISION_CURRENCY_STANDARD, 82 STEM_PRECISION_CURRENCY_CASH, 83 STEM_ROUNDING_MODE_CEILING, 84 STEM_ROUNDING_MODE_FLOOR, 85 STEM_ROUNDING_MODE_DOWN, 86 STEM_ROUNDING_MODE_UP, 87 STEM_ROUNDING_MODE_HALF_EVEN, 88 STEM_ROUNDING_MODE_HALF_ODD, 89 STEM_ROUNDING_MODE_HALF_CEILING, 90 STEM_ROUNDING_MODE_HALF_FLOOR, 91 STEM_ROUNDING_MODE_HALF_DOWN, 92 STEM_ROUNDING_MODE_HALF_UP, 93 STEM_ROUNDING_MODE_UNNECESSARY, 94 STEM_INTEGER_WIDTH_TRUNC, 95 STEM_GROUP_OFF, 96 STEM_GROUP_MIN2, 97 STEM_GROUP_AUTO, 98 STEM_GROUP_ON_ALIGNED, 99 STEM_GROUP_THOUSANDS, 100 STEM_LATIN, 101 STEM_UNIT_WIDTH_NARROW, 102 STEM_UNIT_WIDTH_SHORT, 103 STEM_UNIT_WIDTH_FULL_NAME, 104 STEM_UNIT_WIDTH_ISO_CODE, 105 STEM_UNIT_WIDTH_FORMAL, 106 STEM_UNIT_WIDTH_VARIANT, 107 STEM_UNIT_WIDTH_HIDDEN, 108 STEM_SIGN_AUTO, 109 STEM_SIGN_ALWAYS, 110 STEM_SIGN_NEVER, 111 STEM_SIGN_ACCOUNTING, 112 STEM_SIGN_ACCOUNTING_ALWAYS, 113 STEM_SIGN_EXCEPT_ZERO, 114 STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, 115 STEM_SIGN_NEGATIVE, 116 STEM_SIGN_ACCOUNTING_NEGATIVE, 117 STEM_DECIMAL_AUTO, 118 STEM_DECIMAL_ALWAYS, 119 120 // Section 2: Stems that DO require an option: 121 122 STEM_PRECISION_INCREMENT, 123 STEM_MEASURE_UNIT, 124 STEM_PER_MEASURE_UNIT, 125 STEM_UNIT, 126 STEM_UNIT_USAGE, 127 STEM_CURRENCY, 128 STEM_INTEGER_WIDTH, 129 STEM_NUMBERING_SYSTEM, 130 STEM_SCALE, 131 }; 132 133 /** Default wildcard char, accepted on input and printed in output */ 134 constexpr char16_t kWildcardChar = u'*'; 135 136 /** Alternative wildcard char, accept on input but not printed in output */ 137 constexpr char16_t kAltWildcardChar = u'+'; 138 139 /** Checks whether the char is a wildcard on input */ 140 inline bool isWildcardChar(char16_t c) { 141 return c == kWildcardChar || c == kAltWildcardChar; 142 } 143 144 /** 145 * Creates a NumberFormatter corresponding to the given skeleton string. 146 * 147 * @param skeletonString 148 * A number skeleton string, possibly not in its shortest form. 149 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. 150 */ 151 UnlocalizedNumberFormatter create( 152 const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); 153 154 /** 155 * Create a skeleton string corresponding to the given NumberFormatter. 156 * 157 * @param macros 158 * The NumberFormatter options object. 159 * @return A skeleton string in normalized form. 160 */ 161 UnicodeString generate(const MacroProps& macros, UErrorCode& status); 162 163 /** 164 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. 165 * 166 * Internal: use the create() endpoint instead of this function. 167 */ 168 MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); 169 170 /** 171 * Given that the current segment represents a stem, parse it and save the result. 172 * 173 * @return The next state after parsing this stem, corresponding to what subset of options to expect. 174 */ 175 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, 176 MacroProps& macros, UErrorCode& status); 177 178 /** 179 * Given that the current segment represents an option, parse it and save the result. 180 * 181 * @return The next state after parsing this option, corresponding to what subset of options to 182 * expect next. 183 */ 184 ParseState 185 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); 186 187 } // namespace skeleton 188 189 190 /** 191 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This 192 * applies to only the "Section 1" stems, those that are well-defined without an option. 193 */ 194 namespace stem_to_object { 195 196 Notation notation(skeleton::StemEnum stem); 197 198 MeasureUnit unit(skeleton::StemEnum stem); 199 200 Precision precision(skeleton::StemEnum stem); 201 202 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem); 203 204 UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem); 205 206 UNumberUnitWidth unitWidth(skeleton::StemEnum stem); 207 208 UNumberSignDisplay signDisplay(skeleton::StemEnum stem); 209 210 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem); 211 212 } // namespace stem_to_object 213 214 /** 215 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions 216 * take place in the object_to_stem_string namespace. 217 */ 218 namespace enum_to_stem_string { 219 220 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); 221 222 void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); 223 224 void unitWidth(UNumberUnitWidth value, UnicodeString& sb); 225 226 void signDisplay(UNumberSignDisplay value, UnicodeString& sb); 227 228 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); 229 230 } // namespace enum_to_stem_string 231 232 /** 233 * Namespace for utility methods for processing stems and options that cannot be interpreted literally. 234 */ 235 namespace blueprint_helpers { 236 237 /** @return Whether we successfully found and parsed an exponent width option. */ 238 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 239 240 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); 241 242 /** @return Whether we successfully found and parsed an exponent sign option. */ 243 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 244 245 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 246 247 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); 248 249 // "measure-unit/" is deprecated in favour of "unit/". 250 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 251 252 // "per-measure-unit/" is deprecated in favour of "unit/". 253 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 254 255 /** 256 * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as 257 * specified via a "unit/" concise skeleton. 258 */ 259 void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 260 261 void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 262 263 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 264 265 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); 266 267 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 268 269 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); 270 271 void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 272 273 // Note: no generateScientificStem since this syntax was added later in ICU 67 274 275 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 276 277 // Note: no generateIntegerStem since this syntax was added later in ICU 67 278 279 /** @return Whether we successfully found and parsed a frac-sig option. */ 280 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 281 282 /** @return Whether we successfully found and parsed a trailing zero option. */ 283 bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 284 285 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 286 287 void 288 generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status); 289 290 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 291 292 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); 293 294 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 295 296 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); 297 298 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); 299 300 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, 301 UErrorCode& status); 302 303 } // namespace blueprint_helpers 304 305 /** 306 * Class for utility methods for generating a token corresponding to each macro-prop. Each method 307 * returns whether or not a token was written to the string builder. 308 * 309 * This needs to be a class, not a namespace, so it can be friended. 310 */ 311 class GeneratorHelpers { 312 public: 313 /** 314 * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given 315 * StringBuilder. 316 * 317 * Internal: use the create() endpoint instead of this function. 318 */ 319 static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 320 321 private: 322 static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 323 324 static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 325 326 static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 327 328 static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 329 330 static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 331 332 static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 333 334 static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 335 336 static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 337 338 static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 339 340 static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 341 342 static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 343 344 static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); 345 346 }; 347 348 /** 349 * Struct for null-checking. 350 * In Java, we can just check the object reference. In C++, we need a different method. 351 */ 352 struct SeenMacroProps { 353 bool notation = false; 354 bool unit = false; 355 bool perUnit = false; 356 bool usage = false; 357 bool precision = false; 358 bool roundingMode = false; 359 bool grouper = false; 360 bool padder = false; 361 bool integerWidth = false; 362 bool symbols = false; 363 bool unitWidth = false; 364 bool sign = false; 365 bool decimal = false; 366 bool scale = false; 367 }; 368 369 namespace { 370 371 #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ 372 UPRV_BLOCK_MACRO_BEGIN { \ 373 UErrorCode conversionStatus = U_ZERO_ERROR; \ 374 (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ 375 if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ 376 /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ 377 (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ 378 return; \ 379 } else if (U_FAILURE(conversionStatus)) { \ 380 (status) = conversionStatus; \ 381 return; \ 382 } \ 383 } UPRV_BLOCK_MACRO_END 384 385 } // namespace 386 387 } // namespace number::impl 388 U_NAMESPACE_END 389 390 #endif //__SOURCE_NUMBER_SKELETONS_H__ 391 #endif /* #if !UCONFIG_NO_FORMATTING */