tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

number_modifiers.cpp (19469B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include "umutex.h"
      9 #include "ucln_cmn.h"
     10 #include "ucln_in.h"
     11 #include "number_modifiers.h"
     12 
     13 using namespace icu;
     14 using namespace icu::number;
     15 using namespace icu::number::impl;
     16 
     17 namespace {
     18 
     19 // TODO: This is copied from simpleformatter.cpp
     20 const int32_t ARG_NUM_LIMIT = 0x100;
     21 
     22 // These are the default currency spacing UnicodeSets in CLDR.
     23 // Pre-compute them for performance.
     24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
     25 icu::UInitOnce gDefaultCurrencySpacingInitOnce {};
     26 
     27 UnicodeSet *UNISET_DIGIT = nullptr;
     28 UnicodeSet *UNISET_NOTSZ = nullptr;
     29 
     30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
     31    delete UNISET_DIGIT;
     32    UNISET_DIGIT = nullptr;
     33    delete UNISET_NOTSZ;
     34    UNISET_NOTSZ = nullptr;
     35    gDefaultCurrencySpacingInitOnce.reset();
     36    return true;
     37 }
     38 
     39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
     40    ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
     41    UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
     42    UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
     43    if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
     44        status = U_MEMORY_ALLOCATION_ERROR;
     45        return;
     46    }
     47    UNISET_DIGIT->freeze();
     48    UNISET_NOTSZ->freeze();
     49 }
     50 
     51 }  // namespace
     52 
     53 
     54 Modifier::~Modifier() = default;
     55 
     56 Modifier::Parameters::Parameters()
     57        : obj(nullptr) {}
     58 
     59 Modifier::Parameters::Parameters(
     60    const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
     61        : obj(_obj), signum(_signum), plural(_plural) {}
     62 
     63 bool Modifier::semanticallyEquivalent(const Modifier& other) const {
     64    Parameters paramsThis;
     65    Parameters paramsOther;
     66    getParameters(paramsThis);
     67    other.getParameters(paramsOther);
     68    if (paramsThis.obj == nullptr && paramsOther.obj == nullptr) {
     69        return strictEquals(other);
     70    } else if (paramsThis.obj == nullptr || paramsOther.obj == nullptr) {
     71        return false;
     72    }
     73    for (size_t i=0; i<SIGNUM_COUNT; i++) {
     74        auto signum = static_cast<Signum>(i);
     75        for (size_t j=0; j<StandardPlural::COUNT; j++) {
     76            auto plural = static_cast<StandardPlural::Form>(j);
     77            const auto* mod1 = paramsThis.obj->getModifier(signum, plural);
     78            const auto* mod2 = paramsOther.obj->getModifier(signum, plural);
     79            if (mod1 == mod2) {
     80                // Equal pointers
     81                continue;
     82            } else if (mod1 == nullptr || mod2 == nullptr) {
     83                // One pointer is null but not the other
     84                return false;
     85            } else if (!mod1->strictEquals(*mod2)) {
     86                // The modifiers are NOT equivalent
     87                return false;
     88            } else {
     89                // The modifiers are equivalent
     90                continue;
     91            }
     92        }
     93    }
     94    return true;
     95 }
     96 
     97 
     98 ModifierStore::~ModifierStore() = default;
     99 
    100 AdoptingSignumModifierStore::~AdoptingSignumModifierStore()  {
    101    for (const Modifier *mod : mods) {
    102        delete mod;
    103    }
    104 }
    105 
    106 AdoptingSignumModifierStore&
    107 AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept {
    108    for (size_t i=0; i<SIGNUM_COUNT; i++) {
    109        this->mods[i] = other.mods[i];
    110        other.mods[i] = nullptr;
    111    }
    112    return *this;
    113 }
    114 
    115 
    116 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
    117                                     UErrorCode &status) const {
    118    // Insert the suffix first since inserting the prefix will change the rightIndex
    119    int length = output.insert(rightIndex, fSuffix, fField, status);
    120    length += output.insert(leftIndex, fPrefix, fField, status);
    121    return length;
    122 }
    123 
    124 int32_t ConstantAffixModifier::getPrefixLength() const {
    125    return fPrefix.length();
    126 }
    127 
    128 int32_t ConstantAffixModifier::getCodePointCount() const {
    129    return fPrefix.countChar32() + fSuffix.countChar32();
    130 }
    131 
    132 bool ConstantAffixModifier::isStrong() const {
    133    return fStrong;
    134 }
    135 
    136 bool ConstantAffixModifier::containsField(Field field) const {
    137    (void)field;
    138    // This method is not currently used.
    139    UPRV_UNREACHABLE_EXIT;
    140 }
    141 
    142 void ConstantAffixModifier::getParameters(Parameters& output) const {
    143    (void)output;
    144    // This method is not currently used.
    145    UPRV_UNREACHABLE_EXIT;
    146 }
    147 
    148 bool ConstantAffixModifier::strictEquals(const Modifier& other) const {
    149    const auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
    150    if (_other == nullptr) {
    151        return false;
    152    }
    153    return fPrefix == _other->fPrefix
    154        && fSuffix == _other->fSuffix
    155        && fField == _other->fField
    156        && fStrong == _other->fStrong;
    157 }
    158 
    159 
    160 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
    161        : SimpleModifier(simpleFormatter, field, strong, {}) {}
    162 
    163 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
    164                               const Modifier::Parameters parameters)
    165        : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
    166          fParameters(parameters) {
    167    int32_t argLimit = SimpleFormatter::getArgumentLimit(
    168            fCompiledPattern.getBuffer(), fCompiledPattern.length());
    169    if (argLimit == 0) {
    170        // No arguments in compiled pattern
    171        fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
    172        U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
    173        // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
    174        fSuffixOffset = -1;
    175        fSuffixLength = 0;
    176    } else {
    177        U_ASSERT(argLimit == 1);
    178        if (fCompiledPattern.charAt(1) != 0) {
    179            // Found prefix
    180            fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
    181            fSuffixOffset = 3 + fPrefixLength;
    182        } else {
    183            // No prefix
    184            fPrefixLength = 0;
    185            fSuffixOffset = 2;
    186        }
    187        if (3 + fPrefixLength < fCompiledPattern.length()) {
    188            // Found suffix
    189            fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
    190        } else {
    191            // No suffix
    192            fSuffixLength = 0;
    193        }
    194    }
    195 }
    196 
    197 SimpleModifier::SimpleModifier()
    198        : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
    199 }
    200 
    201 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
    202                              UErrorCode &status) const {
    203    return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
    204 }
    205 
    206 int32_t SimpleModifier::getPrefixLength() const {
    207    return fPrefixLength;
    208 }
    209 
    210 int32_t SimpleModifier::getCodePointCount() const {
    211    int32_t count = 0;
    212    if (fPrefixLength > 0) {
    213        count += fCompiledPattern.countChar32(2, fPrefixLength);
    214    }
    215    if (fSuffixLength > 0) {
    216        count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
    217    }
    218    return count;
    219 }
    220 
    221 bool SimpleModifier::isStrong() const {
    222    return fStrong;
    223 }
    224 
    225 bool SimpleModifier::containsField(Field field) const {
    226    (void)field;
    227    // This method is not currently used.
    228    UPRV_UNREACHABLE_EXIT;
    229 }
    230 
    231 void SimpleModifier::getParameters(Parameters& output) const {
    232    output = fParameters;
    233 }
    234 
    235 bool SimpleModifier::strictEquals(const Modifier& other) const {
    236    const auto* _other = dynamic_cast<const SimpleModifier*>(&other);
    237    if (_other == nullptr) {
    238        return false;
    239    }
    240    return fCompiledPattern == _other->fCompiledPattern
    241        && fField == _other->fField
    242        && fStrong == _other->fStrong;
    243 }
    244 
    245 
    246 int32_t
    247 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
    248                                     UErrorCode &status) const {
    249    if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
    250        // There is no argument for the inner number; overwrite the entire segment with our string.
    251        return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
    252    } else {
    253        if (fPrefixLength > 0) {
    254            result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
    255        }
    256        if (fSuffixLength > 0) {
    257            result.insert(
    258                    endIndex + fPrefixLength,
    259                    fCompiledPattern,
    260                    1 + fSuffixOffset,
    261                    1 + fSuffixOffset + fSuffixLength,
    262                    fField,
    263                    status);
    264        }
    265        return fPrefixLength + fSuffixLength;
    266    }
    267 }
    268 
    269 
    270 int32_t
    271 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
    272                                    int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
    273                                    Field field, UErrorCode& status) {
    274    const UnicodeString& compiledPattern = compiled.compiledPattern;
    275    int32_t argLimit = SimpleFormatter::getArgumentLimit(
    276            compiledPattern.getBuffer(), compiledPattern.length());
    277    if (argLimit != 2) {
    278        status = U_INTERNAL_PROGRAM_ERROR;
    279        return 0;
    280    }
    281    int32_t offset = 1; // offset into compiledPattern
    282    int32_t length = 0; // chars added to result
    283 
    284    int32_t prefixLength = compiledPattern.charAt(offset);
    285    offset++;
    286    if (prefixLength < ARG_NUM_LIMIT) {
    287        // No prefix
    288        prefixLength = 0;
    289    } else {
    290        prefixLength -= ARG_NUM_LIMIT;
    291        result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
    292        offset += prefixLength;
    293        length += prefixLength;
    294        offset++;
    295    }
    296 
    297    int32_t infixLength = compiledPattern.charAt(offset);
    298    offset++;
    299    if (infixLength < ARG_NUM_LIMIT) {
    300        // No infix
    301        infixLength = 0;
    302    } else {
    303        infixLength -= ARG_NUM_LIMIT;
    304        result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
    305        offset += infixLength;
    306        length += infixLength;
    307        offset++;
    308    }
    309 
    310    int32_t suffixLength;
    311    if (offset == compiledPattern.length()) {
    312        // No suffix
    313        suffixLength = 0;
    314    } else {
    315        suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
    316        offset++;
    317        result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
    318        length += suffixLength;
    319    }
    320 
    321    *outPrefixLength = prefixLength;
    322    *outSuffixLength = suffixLength;
    323 
    324    return length;
    325 }
    326 
    327 
    328 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
    329                                          UErrorCode &status) const {
    330    int32_t length = output.insert(leftIndex, fPrefix, status);
    331    if (fOverwrite) {
    332        length += output.splice(
    333            leftIndex + length,
    334            rightIndex + length,
    335            UnicodeString(), 0, 0,
    336            kUndefinedField, status);
    337    }
    338    length += output.insert(rightIndex + length, fSuffix, status);
    339    return length;
    340 }
    341 
    342 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
    343    return fPrefix.length();
    344 }
    345 
    346 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
    347    return fPrefix.codePointCount() + fSuffix.codePointCount();
    348 }
    349 
    350 bool ConstantMultiFieldModifier::isStrong() const {
    351    return fStrong;
    352 }
    353 
    354 bool ConstantMultiFieldModifier::containsField(Field field) const {
    355    return fPrefix.containsField(field) || fSuffix.containsField(field);
    356 }
    357 
    358 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
    359    output = fParameters;
    360 }
    361 
    362 bool ConstantMultiFieldModifier::strictEquals(const Modifier& other) const {
    363    const auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
    364    if (_other == nullptr) {
    365        return false;
    366    }
    367    return fPrefix.contentEquals(_other->fPrefix)
    368        && fSuffix.contentEquals(_other->fSuffix)
    369        && fOverwrite == _other->fOverwrite
    370        && fStrong == _other->fStrong;
    371 }
    372 
    373 
    374 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
    375                                                               const FormattedStringBuilder &suffix,
    376                                                               bool overwrite,
    377                                                               bool strong,
    378                                                               const DecimalFormatSymbols &symbols,
    379                                                               UErrorCode &status)
    380        : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
    381    // Check for currency spacing. Do not build the UnicodeSets unless there is
    382    // a currency code point at a boundary.
    383    if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
    384        int prefixCp = prefix.getLastCodePoint();
    385        UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
    386        if (prefixUnicodeSet.contains(prefixCp)) {
    387            fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
    388            fAfterPrefixUnicodeSet.freeze();
    389            fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
    390        } else {
    391            fAfterPrefixUnicodeSet.setToBogus();
    392            fAfterPrefixInsert.setToBogus();
    393        }
    394    } else {
    395        fAfterPrefixUnicodeSet.setToBogus();
    396        fAfterPrefixInsert.setToBogus();
    397    }
    398    if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
    399        int suffixCp = suffix.getFirstCodePoint();
    400        UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
    401        if (suffixUnicodeSet.contains(suffixCp)) {
    402            fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
    403            fBeforeSuffixUnicodeSet.freeze();
    404            fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
    405        } else {
    406            fBeforeSuffixUnicodeSet.setToBogus();
    407            fBeforeSuffixInsert.setToBogus();
    408        }
    409    } else {
    410        fBeforeSuffixUnicodeSet.setToBogus();
    411        fBeforeSuffixInsert.setToBogus();
    412    }
    413 }
    414 
    415 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
    416                                              UErrorCode &status) const {
    417    // Currency spacing logic
    418    int length = 0;
    419    if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
    420        fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
    421        // TODO: Should we use the CURRENCY field here?
    422        length += output.insert(
    423            leftIndex,
    424            fAfterPrefixInsert,
    425            kUndefinedField,
    426            status);
    427    }
    428    if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
    429        fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
    430        // TODO: Should we use the CURRENCY field here?
    431        length += output.insert(
    432            rightIndex + length,
    433            fBeforeSuffixInsert,
    434            kUndefinedField,
    435            status);
    436    }
    437 
    438    // Call super for the remaining logic
    439    length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
    440    return length;
    441 }
    442 
    443 int32_t
    444 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
    445                                                     int32_t prefixLen, int32_t suffixStart,
    446                                                     int32_t suffixLen,
    447                                                     const DecimalFormatSymbols &symbols,
    448                                                     UErrorCode &status) {
    449    int length = 0;
    450    bool hasPrefix = (prefixLen > 0);
    451    bool hasSuffix = (suffixLen > 0);
    452    bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
    453    if (hasPrefix && hasNumber) {
    454        length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
    455    }
    456    if (hasSuffix && hasNumber) {
    457        length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
    458    }
    459    return length;
    460 }
    461 
    462 int32_t
    463 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
    464                                                          EAffix affix,
    465                                                          const DecimalFormatSymbols &symbols,
    466                                                          UErrorCode &status) {
    467    // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
    468    // This works even if the last code point in the prefix is 2 code units because the
    469    // field value gets populated to both indices in the field array.
    470    Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
    471    if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
    472        return 0;
    473    }
    474    int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
    475    UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
    476    if (!affixUniset.contains(affixCp)) {
    477        return 0;
    478    }
    479    int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
    480    UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
    481    if (!numberUniset.contains(numberCp)) {
    482        return 0;
    483    }
    484    UnicodeString spacingString = getInsertString(symbols, affix, status);
    485 
    486    // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
    487    // It would be more efficient if this could be done before affixes were attached,
    488    // so that it could be prepended/appended instead of inserted.
    489    // However, the build code path is more efficient, and this is the most natural
    490    // place to put currency spacing in the non-build code path.
    491    // TODO: Should we use the CURRENCY field here?
    492    return output.insert(index, spacingString, kUndefinedField, status);
    493 }
    494 
    495 UnicodeSet
    496 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
    497                                              EAffix affix, UErrorCode &status) {
    498    // Ensure the static defaults are initialized:
    499    umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
    500    if (U_FAILURE(status)) {
    501        return {};
    502    }
    503 
    504    const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
    505            position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
    506            affix == SUFFIX,
    507            status);
    508    if (pattern.compare(u"[:digit:]", -1) == 0) {
    509        return *UNISET_DIGIT;
    510    } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
    511        return *UNISET_NOTSZ;
    512    } else {
    513        return UnicodeSet(pattern, status);
    514    }
    515 }
    516 
    517 UnicodeString
    518 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
    519                                                UErrorCode &status) {
    520    return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
    521 }
    522 
    523 #endif /* #if !UCONFIG_NO_FORMATTING */