tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

units_data.cpp (23021B)


      1 // © 2020 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include "bytesinkutil.h"
      9 #include "charstr.h"
     10 #include "cstring.h"
     11 #include "measunit_impl.h"
     12 #include "number_decimalquantity.h"
     13 #include "resource.h"
     14 #include "uassert.h"
     15 #include "ulocimp.h"
     16 #include "unicode/locid.h"
     17 #include "unicode/unistr.h"
     18 #include "unicode/ures.h"
     19 #include "units_data.h"
     20 #include "uresimp.h"
     21 #include "util.h"
     22 #include <utility>
     23 
     24 U_NAMESPACE_BEGIN
     25 namespace units {
     26 
     27 namespace {
     28 
     29 using icu::number::impl::DecimalQuantity;
     30 
     31 void trimSpaces(CharString& factor, UErrorCode& status){
     32   CharString trimmed;
     33   for (int i = 0 ; i < factor.length(); i++) {
     34       if (factor[i] == ' ') continue;
     35 
     36       trimmed.append(factor[i], status);
     37   }
     38 
     39   factor = std::move(trimmed);
     40 }
     41 
     42 /**
     43 * A ResourceSink that collects conversion rate information.
     44 *
     45 * This class is for use by ures_getAllItemsWithFallback.
     46 */
     47 class ConversionRateDataSink : public ResourceSink {
     48  public:
     49    /**
     50     * Constructor.
     51     * @param out The vector to which ConversionRateInfo instances are to be
     52     * added. This vector must outlive the use of the ResourceSink.
     53     */
     54    explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
     55 
     56    /**
     57     * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
     58     * conversion rates that are found in `value` to the output vector.
     59     *
     60     * @param source This string must be "convertUnits": the resource that this
     61     * class supports reading.
     62     * @param value The "convertUnits" resource, containing unit conversion rate
     63     * information.
     64     * @param noFallback Ignored.
     65     * @param status The standard ICU error code output parameter.
     66     */
     67    void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
     68        if (U_FAILURE(status)) { return; }
     69        if (uprv_strcmp(source, "convertUnits") != 0) {
     70            // This is very strict, however it is the cheapest way to be sure
     71            // that with `value`, we're looking at the convertUnits table.
     72            status = U_ILLEGAL_ARGUMENT_ERROR;
     73            return;
     74        }
     75        ResourceTable conversionRateTable = value.getTable(status);
     76        const char *srcUnit;
     77        // We're reusing `value`, which seems to be a common pattern:
     78        for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
     79            ResourceTable unitTable = value.getTable(status);
     80            const char *key;
     81            UnicodeString baseUnit = ICU_Utility::makeBogusString();
     82            UnicodeString factor = ICU_Utility::makeBogusString();
     83            UnicodeString offset = ICU_Utility::makeBogusString();
     84            UnicodeString special = ICU_Utility::makeBogusString();
     85            UnicodeString systems = ICU_Utility::makeBogusString();
     86            for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
     87                if (uprv_strcmp(key, "target") == 0) {
     88                    baseUnit = value.getUnicodeString(status);
     89                } else if (uprv_strcmp(key, "factor") == 0) {
     90                    factor = value.getUnicodeString(status);
     91                } else if (uprv_strcmp(key, "offset") == 0) {
     92                    offset = value.getUnicodeString(status);
     93                } else if (uprv_strcmp(key, "special") == 0) {
     94                    special = value.getUnicodeString(status); // the name of a special mapping used instead of factor + optional offset.
     95                } else if (uprv_strcmp(key, "systems") == 0) {
     96                    systems = value.getUnicodeString(status);
     97                }
     98            }
     99            if (U_FAILURE(status)) { return; }
    100            if (baseUnit.isBogus() || (factor.isBogus() && special.isBogus())) {
    101                // We could not find a usable conversion rate: bad resource.
    102                status = U_MISSING_RESOURCE_ERROR;
    103                return;
    104            }
    105 
    106            // We don't have this ConversionRateInfo yet: add it.
    107            ConversionRateInfo *cr = outVector->emplaceBack();
    108            if (!cr) {
    109                status = U_MEMORY_ALLOCATION_ERROR;
    110                return;
    111            } else {
    112                cr->sourceUnit = srcUnit;
    113                if (cr->sourceUnit.isEmpty() != (*srcUnit == '\0')) {
    114                    status = U_MEMORY_ALLOCATION_ERROR;
    115                }
    116                copyInvariantChars(baseUnit, cr->baseUnit, status);
    117                if (U_SUCCESS(status) && !factor.isBogus()) {
    118                    CharString tmp;
    119                    tmp.appendInvariantChars(factor, status);
    120                    trimSpaces(tmp, status);
    121                    if (U_SUCCESS(status)) {
    122                        cr->factor = tmp.toStringPiece();
    123                        if (cr->factor.isEmpty() != tmp.isEmpty()) {
    124                            status = U_MEMORY_ALLOCATION_ERROR;
    125                        }
    126                    }
    127                }
    128                if (!offset.isBogus()) { copyInvariantChars(offset, cr->offset, status); }
    129                if (!special.isBogus()) { copyInvariantChars(special, cr->specialMappingName, status); }
    130                copyInvariantChars(systems, cr->systems, status);
    131            }
    132        }
    133    }
    134 
    135  private:
    136    MaybeStackVector<ConversionRateInfo> *outVector;
    137 };
    138 
    139 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
    140    return a.compareTo(b) < 0;
    141 }
    142 
    143 /**
    144 * A ResourceSink that collects unit preferences information.
    145 *
    146 * This class is for use by ures_getAllItemsWithFallback.
    147 */
    148 class UnitPreferencesSink : public ResourceSink {
    149  public:
    150    /**
    151     * Constructor.
    152     * @param outPrefs The vector to which UnitPreference instances are to be
    153     * added. This vector must outlive the use of the ResourceSink.
    154     * @param outMetadata  The vector to which UnitPreferenceMetadata instances
    155     * are to be added. This vector must outlive the use of the ResourceSink.
    156     */
    157    explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
    158                                 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
    159        : preferences(outPrefs), metadata(outMetadata) {}
    160 
    161    /**
    162     * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
    163     * preferences info that are found in `value` to the output vector.
    164     *
    165     * @param source This string must be "unitPreferenceData": the resource that
    166     * this class supports reading.
    167     * @param value The "unitPreferenceData" resource, containing unit
    168     * preferences data.
    169     * @param noFallback Ignored.
    170     * @param status The standard ICU error code output parameter. Note: if an
    171     * error is returned, outPrefs and outMetadata may be inconsistent.
    172     */
    173    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
    174        if (U_FAILURE(status)) { return; }
    175        if (uprv_strcmp(key, "unitPreferenceData") != 0) {
    176            // This is very strict, however it is the cheapest way to be sure
    177            // that with `value`, we're looking at the convertUnits table.
    178            status = U_ILLEGAL_ARGUMENT_ERROR;
    179            return;
    180        }
    181        // The unitPreferenceData structure (see data/misc/units.txt) contains a
    182        // hierarchy of category/usage/region, within which are a set of
    183        // preferences. Hence three for-loops and another loop for the
    184        // preferences themselves:
    185        ResourceTable unitPreferenceDataTable = value.getTable(status);
    186        const char *category;
    187        for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
    188            ResourceTable categoryTable = value.getTable(status);
    189            const char *usage;
    190            for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
    191                ResourceTable regionTable = value.getTable(status);
    192                const char *region;
    193                for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
    194                    // `value` now contains the set of preferences for
    195                    // category/usage/region.
    196                    ResourceArray unitPrefs = value.getArray(status);
    197                    if (U_FAILURE(status)) { return; }
    198                    int32_t prefLen = unitPrefs.getSize();
    199 
    200                    // Update metadata for this set of preferences.
    201                    UnitPreferenceMetadata *meta = metadata->emplaceBack(
    202                        category, usage, region, preferences->length(), prefLen, status);
    203                    if (!meta) {
    204                        status = U_MEMORY_ALLOCATION_ERROR;
    205                        return;
    206                    }
    207                    if (U_FAILURE(status)) { return; }
    208                    if (metadata->length() > 1) {
    209                        // Verify that unit preferences are sorted and
    210                        // without duplicates.
    211                        if (!(*(*metadata)[metadata->length() - 2] <
    212                              *(*metadata)[metadata->length() - 1])) {
    213                            status = U_INVALID_FORMAT_ERROR;
    214                            return;
    215                        }
    216                    }
    217 
    218                    // Collect the individual preferences.
    219                    for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
    220                        UnitPreference *up = preferences->emplaceBack();
    221                        if (!up) {
    222                            status = U_MEMORY_ALLOCATION_ERROR;
    223                            return;
    224                        }
    225                        ResourceTable unitPref = value.getTable(status);
    226                        if (U_FAILURE(status)) { return; }
    227                        for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
    228                            if (uprv_strcmp(key, "unit") == 0) {
    229                                copyInvariantChars(value.getUnicodeString(status), up->unit, status);
    230                            } else if (uprv_strcmp(key, "geq") == 0) {
    231                                int32_t length;
    232                                const char16_t *g = value.getString(length, status);
    233                                CharString geq;
    234                                geq.appendInvariantChars(g, length, status);
    235                                DecimalQuantity dq;
    236                                dq.setToDecNumber(geq.data(), status);
    237                                up->geq = dq.toDouble();
    238                            } else if (uprv_strcmp(key, "skeleton") == 0) {
    239                                up->skeleton = value.getUnicodeString(status);
    240                            }
    241                        }
    242                    }
    243                }
    244            }
    245        }
    246    }
    247 
    248  private:
    249    MaybeStackVector<UnitPreference> *preferences;
    250    MaybeStackVector<UnitPreferenceMetadata> *metadata;
    251 };
    252 
    253 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
    254                     const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
    255                     bool *foundRegion, UErrorCode &status) {
    256    if (U_FAILURE(status)) { return -1; }
    257    int32_t start = 0;
    258    int32_t end = metadata->length();
    259    *foundCategory = false;
    260    *foundUsage = false;
    261    *foundRegion = false;
    262    while (start < end) {
    263        int32_t mid = (start + end) / 2;
    264        int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
    265        if (cmp < 0) {
    266            start = mid + 1;
    267        } else if (cmp > 0) {
    268            end = mid;
    269        } else {
    270            return mid;
    271        }
    272    }
    273    return -1;
    274 }
    275 
    276 /**
    277 * Finds the UnitPreferenceMetadata instance that matches the given category,
    278 * usage and region: if missing, region falls back to "001", and usage
    279 * repeatedly drops tailing components, eventually trying "default"
    280 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
    281 *
    282 * @param metadata The full list of UnitPreferenceMetadata instances.
    283 * @param category The category to search for. See getUnitCategory().
    284 * @param usage The usage for which formatting preferences is needed. If the
    285 * given usage is not known, automatic fallback occurs, see function description
    286 * above.
    287 * @param region The region for which preferences are needed. If there are no
    288 * region-specific preferences, this function automatically falls back to the
    289 * "001" region (global).
    290 * @param status The standard ICU error code output parameter.
    291 *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
    292 *   * If fallback to "default" or "001" didn't resolve, status will be
    293 *     U_MISSING_RESOURCE.
    294 * @return The index into the metadata vector which represents the appropriate
    295 * preferences. If appropriate preferences are not found, -1 is returned.
    296 */
    297 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
    298                                   StringPiece category, StringPiece usage, StringPiece region,
    299                                   UErrorCode &status) {
    300    if (U_FAILURE(status)) { return -1; }
    301    bool foundCategory, foundUsage, foundRegion;
    302    UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
    303    int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
    304    if (U_FAILURE(status)) { return -1; }
    305    if (idx >= 0) { return idx; }
    306    if (!foundCategory) {
    307        // TODO: failures can happen if units::getUnitCategory returns a category
    308        // that does not appear in unitPreferenceData. Do we want a unit test that
    309        // checks unitPreferenceData has full coverage of categories? Or just trust
    310        // CLDR?
    311        status = U_ILLEGAL_ARGUMENT_ERROR;
    312        return -1;
    313    }
    314    U_ASSERT(foundCategory);
    315    while (!foundUsage) {
    316        int32_t lastDashIdx = desired.usage.lastIndexOf('-');
    317        if (lastDashIdx > 0) {
    318            desired.usage.truncate(lastDashIdx);
    319        } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
    320            desired.usage.truncate(0).append("default", status);
    321        } else {
    322            // "default" is not supposed to be missing for any valid category.
    323            status = U_MISSING_RESOURCE_ERROR;
    324            return -1;
    325        }
    326        idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
    327        if (U_FAILURE(status)) { return -1; }
    328    }
    329    U_ASSERT(foundCategory);
    330    U_ASSERT(foundUsage);
    331    if (!foundRegion) {
    332        if (uprv_strcmp(desired.region.data(), "001") != 0) {
    333            desired.region.truncate(0).append("001", status);
    334            idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
    335        }
    336        if (!foundRegion) {
    337            // "001" is not supposed to be missing for any valid usage.
    338            status = U_MISSING_RESOURCE_ERROR;
    339            return -1;
    340        }
    341    }
    342    U_ASSERT(foundCategory);
    343    U_ASSERT(foundUsage);
    344    U_ASSERT(foundRegion);
    345    U_ASSERT(idx >= 0);
    346    return idx;
    347 }
    348 
    349 } // namespace
    350 
    351 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
    352                                               StringPiece region, int32_t prefsOffset,
    353                                               int32_t prefsCount, UErrorCode &status) {
    354    this->category.append(category, status);
    355    this->usage.append(usage, status);
    356    this->region.append(region, status);
    357    this->prefsOffset = prefsOffset;
    358    this->prefsCount = prefsCount;
    359 }
    360 
    361 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
    362    int32_t cmp = uprv_strcmp(category.data(), other.category.data());
    363    if (cmp == 0) {
    364        cmp = uprv_strcmp(usage.data(), other.usage.data());
    365    }
    366    if (cmp == 0) {
    367        cmp = uprv_strcmp(region.data(), other.region.data());
    368    }
    369    return cmp;
    370 }
    371 
    372 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
    373                                          bool *foundUsage, bool *foundRegion) const {
    374    int32_t cmp = uprv_strcmp(category.data(), other.category.data());
    375    if (cmp == 0) {
    376        *foundCategory = true;
    377        cmp = uprv_strcmp(usage.data(), other.usage.data());
    378    }
    379    if (cmp == 0) {
    380        *foundUsage = true;
    381        cmp = uprv_strcmp(region.data(), other.region.data());
    382    }
    383    if (cmp == 0) {
    384        *foundRegion = true;
    385    }
    386    return cmp;
    387 }
    388 
    389 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
    390 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
    391    LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
    392    ConversionRateDataSink sink(&result);
    393    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
    394 }
    395 
    396 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
    397                                                                 UErrorCode &status) const {
    398    for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
    399        if (uprv_strncmp(conversionInfo_[i]->sourceUnit.data(), source.data(), source.size()) == 0) {
    400            return conversionInfo_[i];
    401        }
    402    }
    403 
    404    status = U_INTERNAL_PROGRAM_ERROR;
    405    return nullptr;
    406 }
    407 
    408 UnitPreferences::UnitPreferences(UErrorCode& status) {
    409    LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
    410    UnitPreferencesSink sink(&unitPrefs_, &metadata_);
    411    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
    412 }
    413 
    414 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
    415    if (U_FAILURE(status)) { return {}; }
    416    auto result = locale.getKeywordValue<CharString>(kw, status);
    417    if (U_SUCCESS(status) && result.isEmpty()) {
    418        status = U_MISSING_RESOURCE_ERROR;
    419    }
    420    return result;
    421 }
    422 
    423 MaybeStackVector<UnitPreference> UnitPreferences::getPreferencesFor(StringPiece category,
    424                                                                    StringPiece usage,
    425                                                                    const Locale& locale,
    426                                                                    UErrorCode& status) const {
    427    MaybeStackVector<UnitPreference> result;
    428 
    429    // TODO: remove this once all the categories are allowed.
    430    // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping
    431    UErrorCode internalMuStatus = U_ZERO_ERROR;
    432    if (category.compare("temperature") == 0) {
    433        CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
    434        if (U_SUCCESS(internalMuStatus)) {
    435            // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit`
    436            if (localeUnitCharString == "fahrenhe") {
    437                localeUnitCharString = CharString("fahrenheit", status);
    438            }
    439            // TODO: use the unit category as Java especially when all the categories are allowed..
    440            if (localeUnitCharString == "celsius"
    441                || localeUnitCharString == "fahrenheit"
    442                || localeUnitCharString == "kelvin"
    443            ) {
    444                UnitPreference unitPref;
    445                unitPref.unit = localeUnitCharString.toStringPiece();
    446                if (unitPref.unit.isEmpty() != localeUnitCharString.isEmpty()) {
    447                    status = U_MISSING_RESOURCE_ERROR;
    448                    return result;
    449                }
    450                result.emplaceBackAndCheckErrorCode(status, unitPref);
    451                return result;
    452            }
    453        }
    454    }
    455 
    456    CharString region = ulocimp_getRegionForSupplementalData(locale.getName(), true, status);
    457 
    458    // Check the locale system tag, e.g `ms=metric`.
    459    UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
    460    CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
    461    bool isLocaleSystem = false;
    462    if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) {
    463        isLocaleSystem = true;
    464    }
    465 
    466    int32_t idx =
    467        getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
    468    if (U_FAILURE(status)) {
    469        return result;
    470    }
    471 
    472    U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
    473    const UnitPreferenceMetadata *m = metadata_[idx];
    474        
    475    if (isLocaleSystem) {
    476        // if the locale ID specifies a measurment system, check if ALL of the units we got back
    477        // are members of that system (or are "metric_adjacent", which we consider to match all
    478        // the systems)
    479        bool unitsMatchSystem = true;
    480        ConversionRates rates(status);
    481        for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) {
    482            const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]);
    483            MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status);
    484            for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) {
    485                const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j];
    486                const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status);
    487                const char* systems = rateInfo->systems.data();
    488                // "metric-adjacent" is considered to match all the locale systems
    489                if (uprv_strstr(systems, "metric_adjacent") == nullptr) {
    490                    if (uprv_strstr(systems, localeSystem.data()) == nullptr) {
    491                        unitsMatchSystem = false;
    492                    }
    493                }
    494            }
    495        }
    496        
    497        // if any of the units we got back above don't match the mearurement system the locale ID asked for,
    498        // throw out the region and just load the units for the base region for the requested measurement system
    499        if (!unitsMatchSystem) {
    500            region.clear();
    501            if (localeSystem == "ussystem") {
    502                region.append("US", status);
    503            } else if (localeSystem == "uksystem") {
    504                region.append("GB", status);
    505            } else {
    506                region.append("001", status);
    507            }
    508            idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
    509            if (U_FAILURE(status)) {
    510                return result;
    511            }
    512            
    513            m = metadata_[idx];
    514        }
    515    }
    516        
    517    for (int32_t i = 0; i < m->prefsCount; i++) {
    518        result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
    519    }
    520    return result;
    521 }
    522 
    523 } // namespace units
    524 U_NAMESPACE_END
    525 
    526 #endif /* #if !UCONFIG_NO_FORMATTING */