tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

localebuilder.cpp (15252B)


      1 // © 2019 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include <optional>
      5 #include <string_view>
      6 #include <utility>
      7 
      8 #include "bytesinkutil.h"  // StringByteSink<CharString>
      9 #include "charstr.h"
     10 #include "cstring.h"
     11 #include "fixedstring.h"
     12 #include "ulocimp.h"
     13 #include "unicode/localebuilder.h"
     14 #include "unicode/locid.h"
     15 
     16 namespace {
     17 
     18 inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
     19 inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
     20 
     21 constexpr const char* kAttributeKey = "attribute";
     22 
     23 bool _isExtensionSubtags(char key, const char* s, int32_t len) {
     24    switch (uprv_tolower(key)) {
     25        case 'u':
     26            return ultag_isUnicodeExtensionSubtags(s, len);
     27        case 't':
     28            return ultag_isTransformedExtensionSubtags(s, len);
     29        case 'x':
     30            return ultag_isPrivateuseValueSubtags(s, len);
     31        default:
     32            return ultag_isExtensionSubtags(s, len);
     33    }
     34 }
     35 
     36 }  // namespace
     37 
     38 U_NAMESPACE_BEGIN
     39 
     40 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
     41    script_(), region_(), variant_(nullptr), extensions_(nullptr)
     42 {
     43    language_[0] = 0;
     44    script_[0] = 0;
     45    region_[0] = 0;
     46 }
     47 
     48 LocaleBuilder::~LocaleBuilder()
     49 {
     50    delete variant_;
     51    delete extensions_;
     52 }
     53 
     54 LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
     55 {
     56    clear();
     57    setLanguage(locale.getLanguage());
     58    setScript(locale.getScript());
     59    setRegion(locale.getCountry());
     60    setVariant(locale.getVariant());
     61    extensions_ = locale.clone();
     62    if (extensions_ == nullptr) {
     63        status_ = U_MEMORY_ALLOCATION_ERROR;
     64    }
     65    return *this;
     66 }
     67 
     68 LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
     69 {
     70    Locale l = Locale::forLanguageTag(tag, status_);
     71    if (U_FAILURE(status_)) { return *this; }
     72    // Because setLocale will reset status_ we need to return
     73    // first if we have error in forLanguageTag.
     74    setLocale(l);
     75    return *this;
     76 }
     77 
     78 namespace {
     79 
     80 void setField(StringPiece input, char* dest, UErrorCode& errorCode,
     81              bool (*test)(const char*, int32_t)) {
     82    if (U_FAILURE(errorCode)) { return; }
     83    if (input.empty()) {
     84        dest[0] = '\0';
     85    } else if (test(input.data(), input.length())) {
     86        uprv_memcpy(dest, input.data(), input.length());
     87        dest[input.length()] = '\0';
     88    } else {
     89        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     90    }
     91 }
     92 
     93 }  // namespace
     94 
     95 LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
     96 {
     97    setField(language, language_, status_, &ultag_isLanguageSubtag);
     98    return *this;
     99 }
    100 
    101 LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
    102 {
    103    setField(script, script_, status_, &ultag_isScriptSubtag);
    104    return *this;
    105 }
    106 
    107 LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
    108 {
    109    setField(region, region_, status_, &ultag_isRegionSubtag);
    110    return *this;
    111 }
    112 
    113 namespace {
    114 
    115 void transform(char* data, int32_t len) {
    116    for (int32_t i = 0; i < len; i++, data++) {
    117        if (*data == '_') {
    118            *data = '-';
    119        } else {
    120            *data = uprv_tolower(*data);
    121        }
    122    }
    123 }
    124 
    125 }  // namespace
    126 
    127 LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
    128 {
    129    if (U_FAILURE(status_)) { return *this; }
    130    if (variant.empty()) {
    131        delete variant_;
    132        variant_ = nullptr;
    133        return *this;
    134    }
    135    FixedString* new_variant = new FixedString(variant);
    136    if (new_variant == nullptr || new_variant->isEmpty()) {
    137        status_ = U_MEMORY_ALLOCATION_ERROR;
    138        return *this;
    139    }
    140    transform(new_variant->getAlias(), variant.length());
    141    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
    142        delete new_variant;
    143        status_ = U_ILLEGAL_ARGUMENT_ERROR;
    144        return *this;
    145    }
    146    delete variant_;
    147    variant_ = new_variant;
    148    return *this;
    149 }
    150 
    151 namespace {
    152 
    153 bool
    154 _isKeywordValue(const char* key, const char* value, int32_t value_len)
    155 {
    156    if (key[1] == '\0') {
    157        // one char key
    158        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
    159                _isExtensionSubtags(key[0], value, value_len));
    160    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
    161        // unicode attributes
    162        return ultag_isUnicodeLocaleAttributes(value, value_len);
    163    }
    164    // otherwise: unicode extension value
    165    // We need to convert from legacy key/value to unicode
    166    // key/value
    167    std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
    168    std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
    169 
    170    return unicode_locale_key.has_value() &&
    171           unicode_locale_type.has_value() &&
    172           ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
    173                                    static_cast<int32_t>(unicode_locale_key->size())) &&
    174           ultag_isUnicodeLocaleType(unicode_locale_type->data(),
    175                                     static_cast<int32_t>(unicode_locale_type->size()));
    176 }
    177 
    178 void
    179 _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
    180                Locale& to, bool validate, UErrorCode& errorCode)
    181 {
    182    if (U_FAILURE(errorCode)) { return; }
    183    LocalPointer<icu::StringEnumeration> ownedKeywords;
    184    if (keywords == nullptr) {
    185        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
    186        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
    187        keywords = ownedKeywords.getAlias();
    188    }
    189    const char* key;
    190    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
    191        auto value = from.getKeywordValue<CharString>(key, errorCode);
    192        if (U_FAILURE(errorCode)) { return; }
    193        if (uprv_strcmp(key, kAttributeKey) == 0) {
    194            transform(value.data(), value.length());
    195        }
    196        if (validate &&
    197            !_isKeywordValue(key, value.data(), value.length())) {
    198            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    199            return;
    200        }
    201        to.setKeywordValue(key, value.data(), errorCode);
    202        if (U_FAILURE(errorCode)) { return; }
    203    }
    204 }
    205 
    206 void
    207 _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
    208 {
    209    if (U_FAILURE(errorCode)) { return; }
    210    // Clear Unicode attributes
    211    locale.setKeywordValue(kAttributeKey, "", errorCode);
    212 
    213    // Clear all Unicode keyword values
    214    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
    215    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
    216    const char* key;
    217    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
    218        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
    219    }
    220 }
    221 
    222 void
    223 _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
    224 {
    225    if (U_FAILURE(errorCode)) { return; }
    226    // Add the unicode extensions to extensions_
    227    CharString locale_str("und-u-", errorCode);
    228    locale_str.append(value, errorCode);
    229    _copyExtensions(
    230        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
    231        locale, false, errorCode);
    232 }
    233 
    234 }  // namespace
    235 
    236 LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
    237 {
    238    if (U_FAILURE(status_)) { return *this; }
    239    if (!UPRV_ISALPHANUM(key)) {
    240        status_ = U_ILLEGAL_ARGUMENT_ERROR;
    241        return *this;
    242    }
    243    CharString value_str(value, status_);
    244    if (U_FAILURE(status_)) { return *this; }
    245    transform(value_str.data(), value_str.length());
    246    if (!value_str.isEmpty() &&
    247            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
    248        status_ = U_ILLEGAL_ARGUMENT_ERROR;
    249        return *this;
    250    }
    251    if (extensions_ == nullptr) {
    252        extensions_ = Locale::getRoot().clone();
    253        if (extensions_ == nullptr) {
    254            status_ = U_MEMORY_ALLOCATION_ERROR;
    255            return *this;
    256        }
    257    }
    258    if (uprv_tolower(key) != 'u') {
    259        // for t, x and others extension.
    260        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
    261                                     status_);
    262        return *this;
    263    }
    264    _clearUAttributesAndKeyType(*extensions_, status_);
    265    if (U_FAILURE(status_)) { return *this; }
    266    if (!value.empty()) {
    267        _setUnicodeExtensions(*extensions_, value_str, status_);
    268    }
    269    return *this;
    270 }
    271 
    272 LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
    273      StringPiece key, StringPiece type)
    274 {
    275    if (U_FAILURE(status_)) { return *this; }
    276    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
    277            (!type.empty() &&
    278                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
    279      status_ = U_ILLEGAL_ARGUMENT_ERROR;
    280      return *this;
    281    }
    282    if (extensions_ == nullptr) {
    283        extensions_ = Locale::getRoot().clone();
    284        if (extensions_ == nullptr) {
    285            status_ = U_MEMORY_ALLOCATION_ERROR;
    286            return *this;
    287        }
    288    }
    289    extensions_->setUnicodeKeywordValue(key, type, status_);
    290    return *this;
    291 }
    292 
    293 LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
    294    StringPiece value)
    295 {
    296    CharString value_str(value, status_);
    297    if (U_FAILURE(status_)) { return *this; }
    298    transform(value_str.data(), value_str.length());
    299    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
    300        status_ = U_ILLEGAL_ARGUMENT_ERROR;
    301        return *this;
    302    }
    303    if (extensions_ == nullptr) {
    304        extensions_ = Locale::getRoot().clone();
    305        if (extensions_ == nullptr) {
    306            status_ = U_MEMORY_ALLOCATION_ERROR;
    307            return *this;
    308        }
    309        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
    310        return *this;
    311    }
    312 
    313    UErrorCode localErrorCode = U_ZERO_ERROR;
    314    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
    315    if (U_FAILURE(localErrorCode)) {
    316        CharString new_attributes(value_str.data(), status_);
    317        // No attributes, set the attribute.
    318        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
    319        return *this;
    320    }
    321 
    322    transform(attributes.data(),attributes.length());
    323    const char* start = attributes.data();
    324    const char* limit = attributes.data() + attributes.length();
    325    CharString new_attributes;
    326    bool inserted = false;
    327    while (start < limit) {
    328        if (!inserted) {
    329            int cmp = uprv_strcmp(start, value_str.data());
    330            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
    331            if (cmp > 0) {
    332                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
    333                new_attributes.append(value_str.data(), status_);
    334                inserted = true;
    335            }
    336        }
    337        if (!new_attributes.isEmpty()) {
    338            new_attributes.append('_', status_);
    339        }
    340        new_attributes.append(start, status_);
    341        start += uprv_strlen(start) + 1;
    342    }
    343    if (!inserted) {
    344        if (!new_attributes.isEmpty()) {
    345            new_attributes.append('_', status_);
    346        }
    347        new_attributes.append(value_str.data(), status_);
    348    }
    349    // Not yet in the attributes, set the attribute.
    350    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
    351    return *this;
    352 }
    353 
    354 LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
    355    StringPiece value)
    356 {
    357    CharString value_str(value, status_);
    358    if (U_FAILURE(status_)) { return *this; }
    359    transform(value_str.data(), value_str.length());
    360    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
    361        status_ = U_ILLEGAL_ARGUMENT_ERROR;
    362        return *this;
    363    }
    364    if (extensions_ == nullptr) { return *this; }
    365    UErrorCode localErrorCode = U_ZERO_ERROR;
    366    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
    367    // get failure, just return
    368    if (U_FAILURE(localErrorCode)) { return *this; }
    369    // Do not have any attributes, just return.
    370    if (attributes.isEmpty()) { return *this; }
    371 
    372    char* p = attributes.data();
    373    // Replace null terminiator in place for _ and - so later
    374    // we can use uprv_strcmp to compare.
    375    for (int32_t i = 0; i < attributes.length(); i++, p++) {
    376        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
    377    }
    378 
    379    const char* start = attributes.data();
    380    const char* limit = attributes.data() + attributes.length();
    381    CharString new_attributes;
    382    bool found = false;
    383    while (start < limit) {
    384        if (uprv_strcmp(start, value_str.data()) == 0) {
    385            found = true;
    386        } else {
    387            if (!new_attributes.isEmpty()) {
    388                new_attributes.append('_', status_);
    389            }
    390            new_attributes.append(start, status_);
    391        }
    392        start += uprv_strlen(start) + 1;
    393    }
    394    // Found the value in attributes, set the attribute.
    395    if (found) {
    396        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
    397    }
    398    return *this;
    399 }
    400 
    401 LocaleBuilder& LocaleBuilder::clear()
    402 {
    403    status_ = U_ZERO_ERROR;
    404    language_[0] = 0;
    405    script_[0] = 0;
    406    region_[0] = 0;
    407    delete variant_;
    408    variant_ = nullptr;
    409    clearExtensions();
    410    return *this;
    411 }
    412 
    413 LocaleBuilder& LocaleBuilder::clearExtensions()
    414 {
    415    delete extensions_;
    416    extensions_ = nullptr;
    417    return *this;
    418 }
    419 
    420 Locale makeBogusLocale() {
    421  Locale bogus;
    422  bogus.setToBogus();
    423  return bogus;
    424 }
    425 
    426 void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
    427 {
    428    if (U_FAILURE(errorCode)) { return; }
    429    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
    430    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
    431        // Error, or no extensions to copy.
    432        return;
    433    }
    434    if (extensions_ == nullptr) {
    435        extensions_ = Locale::getRoot().clone();
    436        if (extensions_ == nullptr) {
    437            status_ = U_MEMORY_ALLOCATION_ERROR;
    438            return;
    439        }
    440    }
    441    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
    442 }
    443 
    444 Locale LocaleBuilder::build(UErrorCode& errorCode)
    445 {
    446    if (U_FAILURE(errorCode)) {
    447        return makeBogusLocale();
    448    }
    449    if (U_FAILURE(status_)) {
    450        errorCode = status_;
    451        return makeBogusLocale();
    452    }
    453    CharString locale_str(language_, errorCode);
    454    if (uprv_strlen(script_) > 0) {
    455        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
    456    }
    457    if (uprv_strlen(region_) > 0) {
    458        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
    459    }
    460    if (variant_ != nullptr) {
    461        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
    462    }
    463    if (U_FAILURE(errorCode)) {
    464        return makeBogusLocale();
    465    }
    466    Locale product(locale_str.data());
    467    if (extensions_ != nullptr) {
    468        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
    469    }
    470    if (U_FAILURE(errorCode)) {
    471        return makeBogusLocale();
    472    }
    473    return product;
    474 }
    475 
    476 UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
    477    if (U_FAILURE(outErrorCode)) {
    478        // Do not overwrite the older error code
    479        return true;
    480    }
    481    outErrorCode = status_;
    482    return U_FAILURE(outErrorCode);
    483 }
    484 
    485 U_NAMESPACE_END