tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

number_skeletons.h (13403B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 #ifndef __SOURCE_NUMBER_SKELETONS_H__
      8 #define __SOURCE_NUMBER_SKELETONS_H__
      9 
     10 #include "number_types.h"
     11 #include "numparse_types.h"
     12 #include "unicode/ucharstrie.h"
     13 #include "string_segment.h"
     14 
     15 U_NAMESPACE_BEGIN
     16 namespace number::impl {
     17 
     18 // Forward-declaration
     19 struct SeenMacroProps;
     20 
     21 // namespace for enums and entrypoint functions
     22 namespace skeleton {
     23 
     24 ////////////////////////////////////////////////////////////////////////////////////////
     25 // NOTE: For examples of how to add a new stem to the number skeleton parser, see:    //
     26 // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
     27 // and                                                                                //
     28 // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
     29 ////////////////////////////////////////////////////////////////////////////////////////
     30 
     31 /**
     32 * While parsing a skeleton, this enum records what type of option we expect to find next.
     33 */
     34 enum ParseState {
     35 
     36    // Section 0: We expect whitespace or a stem, but not an option:
     37 
     38    STATE_NULL,
     39 
     40    // Section 1: We might accept an option, but it is not required:
     41 
     42    STATE_SCIENTIFIC,
     43    STATE_FRACTION_PRECISION,
     44    STATE_PRECISION,
     45 
     46    // Section 2: An option is required:
     47 
     48    STATE_INCREMENT_PRECISION,
     49    STATE_MEASURE_UNIT,
     50    STATE_PER_MEASURE_UNIT,
     51    STATE_IDENTIFIER_UNIT,
     52    STATE_UNIT_USAGE,
     53    STATE_CURRENCY_UNIT,
     54    STATE_INTEGER_WIDTH,
     55    STATE_NUMBERING_SYSTEM,
     56    STATE_SCALE,
     57 };
     58 
     59 /**
     60 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
     61 * string literal written in upper snake case.
     62 *
     63 * @see StemToObject
     64 * @see #SERIALIZED_STEM_TRIE
     65 */
     66 enum StemEnum {
     67 
     68    // Section 1: Stems that do not require an option:
     69 
     70    STEM_COMPACT_SHORT,
     71    STEM_COMPACT_LONG,
     72    STEM_SCIENTIFIC,
     73    STEM_ENGINEERING,
     74    STEM_NOTATION_SIMPLE,
     75    STEM_BASE_UNIT,
     76    STEM_PERCENT,
     77    STEM_PERMILLE,
     78    STEM_PERCENT_100, // concise-only
     79    STEM_PRECISION_INTEGER,
     80    STEM_PRECISION_UNLIMITED,
     81    STEM_PRECISION_CURRENCY_STANDARD,
     82    STEM_PRECISION_CURRENCY_CASH,
     83    STEM_ROUNDING_MODE_CEILING,
     84    STEM_ROUNDING_MODE_FLOOR,
     85    STEM_ROUNDING_MODE_DOWN,
     86    STEM_ROUNDING_MODE_UP,
     87    STEM_ROUNDING_MODE_HALF_EVEN,
     88    STEM_ROUNDING_MODE_HALF_ODD,
     89    STEM_ROUNDING_MODE_HALF_CEILING,
     90    STEM_ROUNDING_MODE_HALF_FLOOR,
     91    STEM_ROUNDING_MODE_HALF_DOWN,
     92    STEM_ROUNDING_MODE_HALF_UP,
     93    STEM_ROUNDING_MODE_UNNECESSARY,
     94    STEM_INTEGER_WIDTH_TRUNC,
     95    STEM_GROUP_OFF,
     96    STEM_GROUP_MIN2,
     97    STEM_GROUP_AUTO,
     98    STEM_GROUP_ON_ALIGNED,
     99    STEM_GROUP_THOUSANDS,
    100    STEM_LATIN,
    101    STEM_UNIT_WIDTH_NARROW,
    102    STEM_UNIT_WIDTH_SHORT,
    103    STEM_UNIT_WIDTH_FULL_NAME,
    104    STEM_UNIT_WIDTH_ISO_CODE,
    105    STEM_UNIT_WIDTH_FORMAL,
    106    STEM_UNIT_WIDTH_VARIANT,
    107    STEM_UNIT_WIDTH_HIDDEN,
    108    STEM_SIGN_AUTO,
    109    STEM_SIGN_ALWAYS,
    110    STEM_SIGN_NEVER,
    111    STEM_SIGN_ACCOUNTING,
    112    STEM_SIGN_ACCOUNTING_ALWAYS,
    113    STEM_SIGN_EXCEPT_ZERO,
    114    STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
    115    STEM_SIGN_NEGATIVE,
    116    STEM_SIGN_ACCOUNTING_NEGATIVE,
    117    STEM_DECIMAL_AUTO,
    118    STEM_DECIMAL_ALWAYS,
    119 
    120    // Section 2: Stems that DO require an option:
    121 
    122    STEM_PRECISION_INCREMENT,
    123    STEM_MEASURE_UNIT,
    124    STEM_PER_MEASURE_UNIT,
    125    STEM_UNIT,
    126    STEM_UNIT_USAGE,
    127    STEM_CURRENCY,
    128    STEM_INTEGER_WIDTH,
    129    STEM_NUMBERING_SYSTEM,
    130    STEM_SCALE,
    131 };
    132 
    133 /** Default wildcard char, accepted on input and printed in output */
    134 constexpr char16_t kWildcardChar = u'*';
    135 
    136 /** Alternative wildcard char, accept on input but not printed in output */
    137 constexpr char16_t kAltWildcardChar = u'+';
    138 
    139 /** Checks whether the char is a wildcard on input */
    140 inline bool isWildcardChar(char16_t c) {
    141    return c == kWildcardChar || c == kAltWildcardChar;
    142 }
    143 
    144 /**
    145 * Creates a NumberFormatter corresponding to the given skeleton string.
    146 *
    147 * @param skeletonString
    148 *            A number skeleton string, possibly not in its shortest form.
    149 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
    150 */
    151 UnlocalizedNumberFormatter create(
    152    const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
    153 
    154 /**
    155 * Create a skeleton string corresponding to the given NumberFormatter.
    156 *
    157 * @param macros
    158 *            The NumberFormatter options object.
    159 * @return A skeleton string in normalized form.
    160 */
    161 UnicodeString generate(const MacroProps& macros, UErrorCode& status);
    162 
    163 /**
    164 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
    165 *
    166 * Internal: use the create() endpoint instead of this function.
    167 */
    168 MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
    169 
    170 /**
    171 * Given that the current segment represents a stem, parse it and save the result.
    172 *
    173 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
    174 */
    175 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
    176                     MacroProps& macros, UErrorCode& status);
    177 
    178 /**
    179 * Given that the current segment represents an option, parse it and save the result.
    180 *
    181 * @return The next state after parsing this option, corresponding to what subset of options to
    182 *         expect next.
    183 */
    184 ParseState
    185 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    186 
    187 } // namespace skeleton
    188 
    189 
    190 /**
    191 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
    192 * applies to only the "Section 1" stems, those that are well-defined without an option.
    193 */
    194 namespace stem_to_object {
    195 
    196 Notation notation(skeleton::StemEnum stem);
    197 
    198 MeasureUnit unit(skeleton::StemEnum stem);
    199 
    200 Precision precision(skeleton::StemEnum stem);
    201 
    202 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
    203 
    204 UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
    205 
    206 UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
    207 
    208 UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
    209 
    210 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
    211 
    212 } // namespace stem_to_object
    213 
    214 /**
    215 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
    216 * take place in the object_to_stem_string namespace.
    217 */
    218 namespace enum_to_stem_string {
    219 
    220 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
    221 
    222 void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
    223 
    224 void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
    225 
    226 void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
    227 
    228 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
    229 
    230 } // namespace enum_to_stem_string
    231 
    232 /**
    233 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
    234 */
    235 namespace blueprint_helpers {
    236 
    237 /** @return Whether we successfully found and parsed an exponent width option. */
    238 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    239 
    240 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
    241 
    242 /** @return Whether we successfully found and parsed an exponent sign option. */
    243 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    244 
    245 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    246 
    247 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
    248 
    249 // "measure-unit/" is deprecated in favour of "unit/".
    250 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    251 
    252 // "per-measure-unit/" is deprecated in favour of "unit/".
    253 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    254 
    255 /**
    256 * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
    257 * specified via a "unit/" concise skeleton.
    258 */
    259 void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    260 
    261 void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    262 
    263 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    264 
    265 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
    266 
    267 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    268 
    269 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
    270 
    271 void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    272 
    273 // Note: no generateScientificStem since this syntax was added later in ICU 67
    274 
    275 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    276 
    277 // Note: no generateIntegerStem since this syntax was added later in ICU 67
    278 
    279 /** @return Whether we successfully found and parsed a frac-sig option. */
    280 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    281 
    282 /** @return Whether we successfully found and parsed a trailing zero option. */
    283 bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    284 
    285 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    286 
    287 void
    288 generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status);
    289 
    290 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    291 
    292 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
    293 
    294 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    295 
    296 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
    297 
    298 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    299 
    300 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
    301                              UErrorCode& status);
    302 
    303 } // namespace blueprint_helpers
    304 
    305 /**
    306 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
    307 * returns whether or not a token was written to the string builder.
    308 *
    309 * This needs to be a class, not a namespace, so it can be friended.
    310 */
    311 class GeneratorHelpers {
    312  public:
    313    /**
    314     * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
    315     * StringBuilder.
    316     *
    317     * Internal: use the create() endpoint instead of this function.
    318     */
    319    static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    320 
    321  private:
    322    static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    323 
    324    static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    325 
    326    static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    327 
    328    static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    329 
    330    static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    331 
    332    static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    333 
    334    static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    335 
    336    static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    337 
    338    static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    339 
    340    static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    341 
    342    static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    343 
    344    static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    345 
    346 };
    347 
    348 /**
    349 * Struct for null-checking.
    350 * In Java, we can just check the object reference. In C++, we need a different method.
    351 */
    352 struct SeenMacroProps {
    353    bool notation = false;
    354    bool unit = false;
    355    bool perUnit = false;
    356    bool usage = false;
    357    bool precision = false;
    358    bool roundingMode = false;
    359    bool grouper = false;
    360    bool padder = false;
    361    bool integerWidth = false;
    362    bool symbols = false;
    363    bool unitWidth = false;
    364    bool sign = false;
    365    bool decimal = false;
    366    bool scale = false;
    367 };
    368 
    369 namespace {
    370 
    371 #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
    372 UPRV_BLOCK_MACRO_BEGIN { \
    373    UErrorCode conversionStatus = U_ZERO_ERROR; \
    374    (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
    375    if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
    376        /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
    377        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
    378        return; \
    379    } else if (U_FAILURE(conversionStatus)) { \
    380        (status) = conversionStatus; \
    381        return; \
    382    } \
    383 } UPRV_BLOCK_MACRO_END
    384 
    385 } // namespace
    386 
    387 } // namespace number::impl
    388 U_NAMESPACE_END
    389 
    390 #endif //__SOURCE_NUMBER_SKELETONS_H__
    391 #endif /* #if !UCONFIG_NO_FORMATTING */