tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

number_compact.cpp (14246B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include "unicode/ustring.h"
      9 #include "unicode/ures.h"
     10 #include "cstring.h"
     11 #include "charstr.h"
     12 #include "resource.h"
     13 #include "number_compact.h"
     14 #include "number_microprops.h"
     15 #include "uresimp.h"
     16 
     17 using namespace icu;
     18 using namespace icu::number;
     19 using namespace icu::number::impl;
     20 
     21 namespace {
     22 
     23 // A dummy object used when a "0" compact decimal entry is encountered. This is necessary
     24 // in order to prevent falling back to root. Object equality ("==") is intended.
     25 const char16_t *USE_FALLBACK = u"<USE FALLBACK>";
     26 
     27 /** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */
     28 void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,
     29                                 CharString &sb, UErrorCode &status) {
     30    sb.clear();
     31    sb.append("NumberElements/", status);
     32    sb.append(nsName, status);
     33    sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);
     34    sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);
     35 }
     36 
     37 int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) {
     38    return magnitude * StandardPlural::COUNT + plural;
     39 }
     40 
     41 int32_t countZeros(const char16_t *patternString, int32_t patternLength) {
     42    // NOTE: This strategy for computing the number of zeros is a hack for efficiency.
     43    // It could break if there are any 0s that aren't part of the main pattern.
     44    int32_t numZeros = 0;
     45    for (int32_t i = 0; i < patternLength; i++) {
     46        if (patternString[i] == u'0') {
     47            numZeros++;
     48        } else if (numZeros > 0) {
     49            break; // zeros should always be contiguous
     50        }
     51    }
     52    return numZeros;
     53 }
     54 
     55 } // namespace
     56 
     57 // NOTE: patterns and multipliers both get zero-initialized.
     58 CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(true) {
     59 }
     60 
     61 void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
     62                           CompactType compactType, UErrorCode &status) {
     63    CompactDataSink sink(*this);
     64    LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));
     65    if (U_FAILURE(status)) { return; }
     66 
     67    bool nsIsLatn = strcmp(nsName, "latn") == 0;
     68    bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;
     69 
     70    // Fall back to latn numbering system and/or short compact style.
     71    CharString resourceKey;
     72    getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);
     73    UErrorCode localStatus = U_ZERO_ERROR;
     74    ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     75    if (isEmpty && !nsIsLatn) {
     76        getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status);
     77        localStatus = U_ZERO_ERROR;
     78        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     79    }
     80    if (isEmpty && !compactIsShort) {
     81        getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
     82        localStatus = U_ZERO_ERROR;
     83        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     84    }
     85    if (isEmpty && !nsIsLatn && !compactIsShort) {
     86        getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
     87        localStatus = U_ZERO_ERROR;
     88        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     89    }
     90 
     91    // The last fallback should be guaranteed to return data.
     92    if (isEmpty) {
     93        status = U_INTERNAL_PROGRAM_ERROR;
     94    }
     95 }
     96 
     97 int32_t CompactData::getMultiplier(int32_t magnitude) const {
     98    if (magnitude < 0) {
     99        return 0;
    100    }
    101    if (magnitude > largestMagnitude) {
    102        magnitude = largestMagnitude;
    103    }
    104    return multipliers[magnitude];
    105 }
    106 
    107 const char16_t *CompactData::getPattern(
    108        int32_t magnitude,
    109        const PluralRules *rules,
    110        const DecimalQuantity &dq) const {
    111    if (magnitude < 0) {
    112        return nullptr;
    113    }
    114    if (magnitude > largestMagnitude) {
    115        magnitude = largestMagnitude;
    116    }
    117    const char16_t *patternString = nullptr;
    118    if (dq.hasIntegerValue()) {
    119        int64_t i = dq.toLong(true);
    120        if (i == 0) {
    121            patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_0)];
    122        } else if (i == 1) {
    123            patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_1)];
    124        }
    125        if (patternString != nullptr) {
    126            return patternString;
    127        }
    128    }
    129    StandardPlural::Form plural = utils::getStandardPlural(rules, dq);
    130    patternString = patterns[getIndex(magnitude, plural)];
    131    if (patternString == nullptr && plural != StandardPlural::OTHER) {
    132        // Fall back to "other" plural variant
    133        patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];
    134    }
    135    if (patternString == USE_FALLBACK) { // == is intended
    136        // Return null if USE_FALLBACK is present
    137        patternString = nullptr;
    138    }
    139    return patternString;
    140 }
    141 
    142 void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const {
    143    U_ASSERT(output.isEmpty());
    144    // NOTE: In C++, this is done more manually with a UVector.
    145    // In Java, we can take advantage of JDK HashSet.
    146    for (const auto* pattern : patterns) {
    147        if (pattern == nullptr || pattern == USE_FALLBACK) {
    148            continue;
    149        }
    150 
    151        // Insert pattern into the UVector if the UVector does not already contain the pattern.
    152        // Search the UVector from the end since identical patterns are likely to be adjacent.
    153        for (int32_t i = output.size() - 1; i >= 0; i--) {
    154            if (u_strcmp(pattern, static_cast<const char16_t *>(output[i])) == 0) {
    155                goto continue_outer;
    156            }
    157        }
    158 
    159        // The string was not found; add it to the UVector.
    160        // Note: must cast off const from pattern to store it in a UVector, which expects (void *)
    161        output.addElement(const_cast<char16_t *>(pattern), status);
    162 
    163        continue_outer:
    164        continue;
    165    }
    166 }
    167 
    168 void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/,
    169                                       UErrorCode &status) {
    170    // traverse into the table of powers of ten
    171    ResourceTable powersOfTenTable = value.getTable(status);
    172    if (U_FAILURE(status)) { return; }
    173    for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
    174 
    175        // Assumes that the keys are always of the form "10000" where the magnitude is the
    176        // length of the key minus one.  We only support magnitudes less than COMPACT_MAX_DIGITS;
    177        // ignore entries that have greater magnitude.
    178        auto magnitude = static_cast<int8_t> (strlen(key) - 1);
    179        U_ASSERT(magnitude < COMPACT_MAX_DIGITS); // debug assert
    180        if (magnitude >= COMPACT_MAX_DIGITS) { // skip in production
    181            continue;
    182        }
    183        int8_t multiplier = data.multipliers[magnitude];
    184 
    185        // Iterate over the plural variants ("one", "other", etc)
    186        ResourceTable pluralVariantsTable = value.getTable(status);
    187        if (U_FAILURE(status)) { return; }
    188        for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
    189            // Skip this magnitude/plural if we already have it from a child locale.
    190            // Note: This also skips USE_FALLBACK entries.
    191            StandardPlural::Form plural = StandardPlural::fromString(key, status);
    192            if (U_FAILURE(status)) { return; }
    193            if (data.patterns[getIndex(magnitude, plural)] != nullptr) {
    194                continue;
    195            }
    196 
    197            // The value "0" means that we need to use the default pattern and not fall back
    198            // to parent locales. Example locale where this is relevant: 'it'.
    199            int32_t patternLength;
    200            const char16_t *patternString = value.getString(patternLength, status);
    201            if (U_FAILURE(status)) { return; }
    202            if (u_strcmp(patternString, u"0") == 0) {
    203                patternString = USE_FALLBACK;
    204                patternLength = 0;
    205            }
    206 
    207            // Save the pattern string. We will parse it lazily.
    208            data.patterns[getIndex(magnitude, plural)] = patternString;
    209 
    210            // If necessary, compute the multiplier: the difference between the magnitude
    211            // and the number of zeros in the pattern.
    212            if (multiplier == 0) {
    213                int32_t numZeros = countZeros(patternString, patternLength);
    214                if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun"
    215                    multiplier = static_cast<int8_t> (numZeros - magnitude - 1);
    216                }
    217            }
    218        }
    219 
    220        // Save the multiplier.
    221        if (data.multipliers[magnitude] == 0) {
    222            data.multipliers[magnitude] = multiplier;
    223            if (magnitude > data.largestMagnitude) {
    224                data.largestMagnitude = magnitude;
    225            }
    226            data.isEmpty = false;
    227        } else {
    228            U_ASSERT(data.multipliers[magnitude] == multiplier);
    229        }
    230    }
    231 }
    232 
    233 ///////////////////////////////////////////////////////////
    234 /// END OF CompactData.java; BEGIN CompactNotation.java ///
    235 ///////////////////////////////////////////////////////////
    236 
    237 CompactHandler::CompactHandler(
    238        CompactStyle compactStyle,
    239        const Locale &locale,
    240        const char *nsName,
    241        CompactType compactType,
    242        const PluralRules *rules,
    243        MutablePatternModifier *buildReference,
    244        bool safe,
    245        const MicroPropsGenerator *parent,
    246        UErrorCode &status)
    247        : rules(rules), parent(parent), safe(safe) {
    248    data.populate(locale, nsName, compactStyle, compactType, status);
    249    if (safe) {
    250        // Safe code path
    251        precomputeAllModifiers(*buildReference, status);
    252    } else {
    253        // Unsafe code path
    254        // Store the MutablePatternModifier reference.
    255        unsafePatternModifier = buildReference;
    256    }
    257 }
    258 
    259 CompactHandler::~CompactHandler() {
    260    for (int32_t i = 0; i < precomputedModsLength; i++) {
    261        delete precomputedMods[i].mod;
    262    }
    263 }
    264 
    265 void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) {
    266    if (U_FAILURE(status)) { return; }
    267 
    268    // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
    269    UVector allPatterns(12, status);
    270    if (U_FAILURE(status)) { return; }
    271    data.getUniquePatterns(allPatterns, status);
    272    if (U_FAILURE(status)) { return; }
    273 
    274    // C++ only: ensure that precomputedMods has room.
    275    precomputedModsLength = allPatterns.size();
    276    if (precomputedMods.getCapacity() < precomputedModsLength) {
    277        precomputedMods.resize(allPatterns.size(), status);
    278        if (U_FAILURE(status)) { return; }
    279    }
    280 
    281    for (int32_t i = 0; i < precomputedModsLength; i++) {
    282        const auto* patternString = static_cast<const char16_t*>(allPatterns[i]);
    283        UnicodeString hello(patternString);
    284        CompactModInfo &info = precomputedMods[i];
    285        ParsedPatternInfo patternInfo;
    286        PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
    287        if (U_FAILURE(status)) { return; }
    288        buildReference.setPatternInfo(&patternInfo, {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD});
    289        info.mod = buildReference.createImmutable(status);
    290        if (U_FAILURE(status)) { return; }
    291        info.patternString = patternString;
    292    }
    293 }
    294 
    295 void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
    296                                     UErrorCode &status) const {
    297    parent->processQuantity(quantity, micros, status);
    298    if (U_FAILURE(status)) { return; }
    299 
    300    // Treat zero, NaN, and infinity as if they had magnitude 0
    301    int32_t magnitude;
    302    int32_t multiplier = 0;
    303    if (quantity.isZeroish()) {
    304        magnitude = 0;
    305        micros.rounder.apply(quantity, status);
    306    } else {
    307        // TODO: Revisit chooseMultiplierAndApply
    308        multiplier = micros.rounder.chooseMultiplierAndApply(quantity, data, status);
    309        magnitude = quantity.isZeroish() ? 0 : quantity.getMagnitude();
    310        magnitude -= multiplier;
    311    }
    312 
    313    const char16_t *patternString = data.getPattern(magnitude, rules, quantity);
    314    if (patternString == nullptr) {
    315        // Use the default (non-compact) modifier.
    316        // No need to take any action.
    317    } else if (safe) {
    318        // Safe code path.
    319        // Java uses a hash set here for O(1) lookup.  C++ uses a linear search.
    320        // TODO: Benchmark this and maybe change to a binary search or hash table.
    321        int32_t i = 0;
    322        for (; i < precomputedModsLength; i++) {
    323            const CompactModInfo &info = precomputedMods[i];
    324            if (u_strcmp(patternString, info.patternString) == 0) {
    325                info.mod->applyToMicros(micros, quantity, status);
    326                break;
    327            }
    328        }
    329        // It should be guaranteed that we found the entry.
    330        U_ASSERT(i < precomputedModsLength);
    331    } else {
    332        // Unsafe code path.
    333        // Overwrite the PatternInfo in the existing modMiddle.
    334        // C++ Note: Use unsafePatternInfo for proper lifecycle.
    335        ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo;
    336        PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
    337        unsafePatternModifier->setPatternInfo(
    338            &unsafePatternInfo,
    339            {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD});
    340        unsafePatternModifier->setNumberProperties(quantity.signum(), StandardPlural::Form::COUNT);
    341        micros.modMiddle = unsafePatternModifier;
    342    }
    343 
    344    // Change the exponent only after we select appropriate plural form
    345    // for formatting purposes so that we preserve expected formatted
    346    // string behavior.
    347    quantity.adjustExponent(-1 * multiplier);
    348 
    349    // We already performed rounding. Do not perform it again.
    350    micros.rounder = RoundingImpl::passThrough();
    351 }
    352 
    353 #endif /* #if !UCONFIG_NO_FORMATTING */