tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

numrange_impl.cpp (18424B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
      9 // Helpful in toString methods and elsewhere.
     10 #define UNISTR_FROM_STRING_EXPLICIT
     11 
     12 #include "unicode/numberrangeformatter.h"
     13 #include "numrange_impl.h"
     14 #include "patternprops.h"
     15 #include "pluralranges.h"
     16 #include "uresimp.h"
     17 #include "util.h"
     18 
     19 using namespace icu;
     20 using namespace icu::number;
     21 using namespace icu::number::impl;
     22 
     23 namespace {
     24 
     25 // Helper function for 2-dimensional switch statement
     26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
     27    return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
     28 }
     29 
     30 
     31 struct NumberRangeData {
     32    SimpleFormatter rangePattern;
     33    // Note: approximatelyPattern is unused since ICU 69.
     34    // SimpleFormatter approximatelyPattern;
     35 };
     36 
     37 class NumberRangeDataSink : public ResourceSink {
     38  public:
     39    NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
     40 
     41    void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override {
     42        ResourceTable miscTable = value.getTable(status);
     43        if (U_FAILURE(status)) { return; }
     44        for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
     45            if (uprv_strcmp(key, "range") == 0) {
     46                if (hasRangeData()) {
     47                    continue; // have already seen this pattern
     48                }
     49                fData.rangePattern = {value.getUnicodeString(status), status};
     50            }
     51            /*
     52            // Note: approximatelyPattern is unused since ICU 69.
     53            else if (uprv_strcmp(key, "approximately") == 0) {
     54                if (hasApproxData()) {
     55                    continue; // have already seen this pattern
     56                }
     57                fData.approximatelyPattern = {value.getUnicodeString(status), status};
     58            }
     59            */
     60        }
     61    }
     62 
     63    bool hasRangeData() {
     64        return fData.rangePattern.getArgumentLimit() != 0;
     65    }
     66 
     67    /*
     68    // Note: approximatelyPattern is unused since ICU 69.
     69    bool hasApproxData() {
     70        return fData.approximatelyPattern.getArgumentLimit() != 0;
     71    }
     72    */
     73 
     74    bool isComplete() {
     75        return hasRangeData() /* && hasApproxData() */;
     76    }
     77 
     78    void fillInDefaults(UErrorCode& status) {
     79        if (!hasRangeData()) {
     80            fData.rangePattern = {u"{0}–{1}", status};
     81        }
     82        /*
     83        if (!hasApproxData()) {
     84            fData.approximatelyPattern = {u"~{0}", status};
     85        }
     86        */
     87    }
     88 
     89  private:
     90    NumberRangeData& fData;
     91 };
     92 
     93 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
     94    if (U_FAILURE(status)) { return; }
     95    LocalUResourceBundlePointer rb(ures_open(nullptr, localeName, &status));
     96    if (U_FAILURE(status)) { return; }
     97    NumberRangeDataSink sink(data);
     98 
     99    CharString dataPath;
    100    dataPath.append("NumberElements/", -1, status);
    101    dataPath.append(nsName, -1, status);
    102    dataPath.append("/miscPatterns", -1, status);
    103    if (U_FAILURE(status)) { return; }
    104 
    105    UErrorCode localStatus = U_ZERO_ERROR;
    106    ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
    107    if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
    108        status = localStatus;
    109        return;
    110    }
    111 
    112    // Fall back to latn if necessary
    113    if (!sink.isComplete()) {
    114        ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
    115    }
    116 
    117    sink.fillInDefaults(status);
    118 }
    119 
    120 } // namespace
    121 
    122 
    123 
    124 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
    125    : formatterImpl1(macros.formatter1.fMacros, status),
    126      formatterImpl2(macros.formatter2.fMacros, status),
    127      fSameFormatters(macros.singleFormatter),
    128      fCollapse(macros.collapse),
    129      fIdentityFallback(macros.identityFallback),
    130      fApproximatelyFormatter(status) {
    131 
    132    const char* nsName = formatterImpl1.getRawMicroProps().nsName;
    133    if (!fSameFormatters && uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
    134        status = U_ILLEGAL_ARGUMENT_ERROR;
    135        return;
    136    }
    137 
    138    NumberRangeData data;
    139    getNumberRangeData(macros.locale.getName(), nsName, data, status);
    140    if (U_FAILURE(status)) { return; }
    141    fRangeFormatter = data.rangePattern;
    142 
    143    if (fSameFormatters && (
    144            fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY ||
    145            fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) {
    146        MacroProps approximatelyMacros(macros.formatter1.fMacros);
    147        approximatelyMacros.approximately = true;
    148        // Use in-place construction because NumberFormatterImpl has internal self-pointers
    149        fApproximatelyFormatter.~NumberFormatterImpl();
    150        new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status);
    151    }
    152 
    153    // TODO: Get locale from PluralRules instead?
    154    fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
    155    if (U_FAILURE(status)) { return; }
    156 }
    157 
    158 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
    159    if (U_FAILURE(status)) {
    160        return;
    161    }
    162 
    163    DecimalQuantity quantityBackup(data.quantity1);
    164 
    165    MicroProps micros1;
    166    MicroProps micros2;
    167    formatterImpl1.preProcess(data.quantity1, micros1, status);
    168    if (fSameFormatters) {
    169        formatterImpl1.preProcess(data.quantity2, micros2, status);
    170    } else {
    171        formatterImpl2.preProcess(data.quantity2, micros2, status);
    172    }
    173    if (U_FAILURE(status)) {
    174        return;
    175    }
    176 
    177    // If any of the affixes are different, an identity is not possible
    178    // and we must use formatRange().
    179    // TODO: Write this as MicroProps operator==() ?
    180    // TODO: Avoid the redundancy of these equality operations with the
    181    // ones in formatRange?
    182    if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
    183            || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
    184            || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
    185        formatRange(data, micros1, micros2, status);
    186        data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
    187        return;
    188    }
    189 
    190    // Check for identity
    191    if (equalBeforeRounding) {
    192        data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
    193    } else if (data.quantity1 == data.quantity2) {
    194        data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
    195    } else {
    196        data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
    197    }
    198 
    199    switch (identity2d(fIdentityFallback, data.identityResult)) {
    200        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
    201                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
    202        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
    203                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
    204        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
    205                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
    206        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
    207                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
    208        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
    209                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
    210        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
    211                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
    212            formatRange(data, micros1, micros2, status);
    213            break;
    214 
    215        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
    216                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
    217        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
    218                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
    219        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
    220                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
    221            formatApproximately(data, quantityBackup, micros1, micros2, status);
    222            break;
    223 
    224        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
    225                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
    226        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
    227                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
    228        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
    229                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
    230            formatSingleValue(data, micros1, micros2, status);
    231            break;
    232 
    233        default:
    234            UPRV_UNREACHABLE_EXIT;
    235    }
    236 }
    237 
    238 
    239 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
    240                                                 MicroProps& micros1, MicroProps& micros2,
    241                                                 UErrorCode& status) const {
    242    if (U_FAILURE(status)) { return; }
    243    if (fSameFormatters) {
    244        int32_t length = NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, data.getStringRef(), 0, status);
    245        NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
    246    } else {
    247        formatRange(data, micros1, micros2, status);
    248    }
    249 }
    250 
    251 
    252 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
    253                                                    DecimalQuantity quantity,
    254                                                    MicroProps& micros1, MicroProps& micros2,
    255                                                    UErrorCode& status) const {
    256    if (U_FAILURE(status)) { return; }
    257    if (fSameFormatters) {
    258        // Re-format using the approximately formatter:
    259        MicroProps microsAppx;
    260        fApproximatelyFormatter.preProcess(quantity, microsAppx, status);
    261        int32_t length = NumberFormatterImpl::writeNumber(microsAppx.simple, quantity, data.getStringRef(), 0, status);
    262        length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status);
    263        length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status);
    264        microsAppx.modOuter->apply(data.getStringRef(), 0, length, status);
    265    } else {
    266        formatRange(data, micros1, micros2, status);
    267    }
    268 }
    269 
    270 
    271 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
    272                                           MicroProps& micros1, MicroProps& micros2,
    273                                           UErrorCode& status) const {
    274    if (U_FAILURE(status)) { return; }
    275 
    276    // modInner is always notation (scientific); collapsable in ALL.
    277    // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
    278    // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
    279    // Never collapse an outer mod but not an inner mod.
    280    bool collapseOuter, collapseMiddle, collapseInner;
    281    switch (fCollapse) {
    282        case UNUM_RANGE_COLLAPSE_ALL:
    283        case UNUM_RANGE_COLLAPSE_AUTO:
    284        case UNUM_RANGE_COLLAPSE_UNIT:
    285        {
    286            // OUTER MODIFIER
    287            collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
    288 
    289            if (!collapseOuter) {
    290                // Never collapse inner mods if outer mods are not collapsable
    291                collapseMiddle = false;
    292                collapseInner = false;
    293                break;
    294            }
    295 
    296            // MIDDLE MODIFIER
    297            collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
    298 
    299            if (!collapseMiddle) {
    300                // Never collapse inner mods if outer mods are not collapsable
    301                collapseInner = false;
    302                break;
    303            }
    304 
    305            // MIDDLE MODIFIER HEURISTICS
    306            // (could disable collapsing of the middle modifier)
    307            // The modifiers are equal by this point, so we can look at just one of them.
    308            const Modifier* mm = micros1.modMiddle;
    309            if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
    310                // Only collapse if the modifier is a unit.
    311                // TODO: Make a better way to check for a unit?
    312                // TODO: Handle case where the modifier has both notation and unit (compact currency)?
    313                if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
    314                        && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
    315                    collapseMiddle = false;
    316                }
    317            } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
    318                // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
    319                if (mm->getCodePointCount() <= 1) {
    320                    collapseMiddle = false;
    321                }
    322            }
    323 
    324            if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
    325                collapseInner = false;
    326                break;
    327            }
    328 
    329            // INNER MODIFIER
    330            collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
    331 
    332            // All done checking for collapsibility.
    333            break;
    334        }
    335 
    336        default:
    337            collapseOuter = false;
    338            collapseMiddle = false;
    339            collapseInner = false;
    340            break;
    341    }
    342 
    343    FormattedStringBuilder& string = data.getStringRef();
    344    int32_t lengthPrefix = 0;
    345    int32_t length1 = 0;
    346    int32_t lengthInfix = 0;
    347    int32_t length2 = 0;
    348    int32_t lengthSuffix = 0;
    349 
    350    // Use #define so that these are evaluated at the call site.
    351    #define UPRV_INDEX_0 (lengthPrefix)
    352    #define UPRV_INDEX_1 (lengthPrefix + length1)
    353    #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
    354    #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
    355    #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix)
    356 
    357    int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
    358        fRangeFormatter,
    359        string,
    360        0,
    361        &lengthPrefix,
    362        &lengthSuffix,
    363        kUndefinedField,
    364        status);
    365    if (U_FAILURE(status)) { return; }
    366    lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
    367    U_ASSERT(lengthInfix > 0);
    368 
    369    // SPACING HEURISTIC
    370    // Add spacing unless all modifiers are collapsed.
    371    // TODO: add API to control this?
    372    // TODO: Use a data-driven heuristic like currency spacing?
    373    // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
    374    {
    375        bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
    376        bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
    377        bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
    378        if (repeatInner || repeatMiddle || repeatOuter) {
    379            // Add spacing if there is not already spacing
    380            if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
    381                lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
    382            }
    383            if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
    384                lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
    385            }
    386        }
    387    }
    388 
    389    length1 += NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, string, UPRV_INDEX_0, status);
    390    // ICU-21684: Write the second number to a temp string to avoid repeated insert operations
    391    FormattedStringBuilder tempString;
    392    NumberFormatterImpl::writeNumber(micros2.simple, data.quantity2, tempString, 0, status);
    393    length2 += string.insert(UPRV_INDEX_2, tempString, status);
    394 
    395    // TODO: Support padding?
    396 
    397    if (collapseInner) {
    398        const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
    399        lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
    400        lengthPrefix += mod.getPrefixLength();
    401        lengthSuffix -= mod.getPrefixLength();
    402    } else {
    403        length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
    404        length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
    405    }
    406 
    407    if (collapseMiddle) {
    408        const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
    409        lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
    410        lengthPrefix += mod.getPrefixLength();
    411        lengthSuffix -= mod.getPrefixLength();
    412    } else {
    413        length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
    414        length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
    415    }
    416 
    417    if (collapseOuter) {
    418        const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
    419        lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
    420        lengthPrefix += mod.getPrefixLength();
    421        lengthSuffix -= mod.getPrefixLength();
    422    } else {
    423        length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
    424        length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
    425    }
    426 
    427    // Now that all pieces are added, save the span info.
    428    data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status);
    429    data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status);
    430 }
    431 
    432 
    433 const Modifier&
    434 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
    435    Modifier::Parameters parameters;
    436    first.getParameters(parameters);
    437    if (parameters.obj == nullptr) {
    438        // No plural form; return a fallback (e.g., the first)
    439        return first;
    440    }
    441    StandardPlural::Form firstPlural = parameters.plural;
    442 
    443    second.getParameters(parameters);
    444    if (parameters.obj == nullptr) {
    445        // No plural form; return a fallback (e.g., the first)
    446        return first;
    447    }
    448    StandardPlural::Form secondPlural = parameters.plural;
    449 
    450    // Get the required plural form from data
    451    StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
    452 
    453    // Get and return the new Modifier
    454    const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
    455    U_ASSERT(mod != nullptr);
    456    return *mod;
    457 }
    458 
    459 
    460 
    461 #endif /* #if !UCONFIG_NO_FORMATTING */