tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

erarules.cpp (13546B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include <utility>
      5 
      6 #include "unicode/utypes.h"
      7 
      8 #if !UCONFIG_NO_FORMATTING
      9 
     10 #include <stdlib.h>
     11 #include "unicode/ucal.h"
     12 #include "unicode/ures.h"
     13 #include "unicode/ustring.h"
     14 #include "unicode/timezone.h"
     15 #include "cmemory.h"
     16 #include "cstring.h"
     17 #include "erarules.h"
     18 #include "gregoimp.h"
     19 #include "uassert.h"
     20 #include "uvectr32.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 static const int32_t MAX_ENCODED_START_YEAR = 32767;
     25 static const int32_t MIN_ENCODED_START_YEAR = -32768;
     26 static const int32_t MIN_ENCODED_START = -2147483391;   // encodeDate(MIN_ENCODED_START_YEAR, 1, 1, ...);
     27 
     28 static const int32_t YEAR_MASK = 0xFFFF0000;
     29 static const int32_t MONTH_MASK = 0x0000FF00;
     30 static const int32_t DAY_MASK = 0x000000FF;
     31 
     32 static const int32_t MAX_INT32 = 0x7FFFFFFF;
     33 static const int32_t MIN_INT32 = 0xFFFFFFFF;
     34 
     35 static const char16_t VAL_FALSE[] = {0x66, 0x61, 0x6c, 0x73, 0x65};    // "false"
     36 static const char16_t VAL_FALSE_LEN = 5;
     37 
     38 static UBool isSet(int startDate) {
     39    return startDate != 0;
     40 }
     41 
     42 static UBool isValidRuleStartDate(int32_t year, int32_t month, int32_t day) {
     43    return year >= MIN_ENCODED_START_YEAR && year <= MAX_ENCODED_START_YEAR
     44            && month >= 1 && month <= 12 && day >=1 && day <= 31;
     45 }
     46 
     47 /**
     48 * Encode year/month/date to a single integer.
     49 * year is high 16 bits (-32768 to 32767), month is
     50 * next 8 bits and day of month is last 8 bits.
     51 *
     52 * @param year  year
     53 * @param month month (1-base)
     54 * @param day   day of month
     55 * @return  an encoded date.
     56 */
     57 static int32_t encodeDate(int32_t year, int32_t month, int32_t day) {
     58    return static_cast<int32_t>(static_cast<uint32_t>(year) << 16) | month << 8 | day;
     59 }
     60 
     61 static void decodeDate(int32_t encodedDate, int32_t (&fields)[3]) {
     62    if (encodedDate == MIN_ENCODED_START) {
     63        fields[0] = MIN_INT32;
     64        fields[1] = 1;
     65        fields[2] = 1;
     66    } else {
     67        fields[0] = (encodedDate & YEAR_MASK) >> 16;
     68        fields[1] = (encodedDate & MONTH_MASK) >> 8;
     69        fields[2] = encodedDate & DAY_MASK;
     70    }
     71 }
     72 
     73 /**
     74 * Compare an encoded date with another date specified by year/month/day.
     75 * @param encoded   An encoded date
     76 * @param year      Year of another date
     77 * @param month     Month of another date
     78 * @param day       Day of another date
     79 * @return -1 when encoded date is earlier, 0 when two dates are same,
     80 *          and 1 when encoded date is later.
     81 */
     82 static int32_t compareEncodedDateWithYMD(int encoded, int year, int month, int day) {
     83    if (year < MIN_ENCODED_START_YEAR) {
     84        if (encoded == MIN_ENCODED_START) {
     85            if (year > MIN_INT32 || month > 1 || day > 1) {
     86                return -1;
     87            }
     88            return 0;
     89        } else {
     90            return 1;
     91        }
     92    } else if (year > MAX_ENCODED_START_YEAR) {
     93        return -1;
     94    } else {
     95        int tmp = encodeDate(year, month, day);
     96        if (encoded < tmp) {
     97            return -1;
     98        } else if (encoded == tmp) {
     99            return 0;
    100        } else {
    101            return 1;
    102        }
    103    }
    104 }
    105 
    106 EraRules::EraRules(LocalMemory<int32_t>& startDatesIn, int32_t startDatesLengthIn, int32_t minEraIn, int32_t numErasIn)
    107    : startDatesLength(startDatesLengthIn), minEra(minEraIn), numEras(numErasIn) {
    108    startDates = std::move(startDatesIn);
    109    initCurrentEra();
    110 }
    111 
    112 EraRules::~EraRules() {
    113 }
    114 
    115 EraRules* EraRules::createInstance(const char *calType, UBool includeTentativeEra, UErrorCode& status) {
    116    if(U_FAILURE(status)) {
    117        return nullptr;
    118    }
    119    LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status));
    120    ures_getByKey(rb.getAlias(), "calendarData", rb.getAlias(), &status);
    121    ures_getByKey(rb.getAlias(), calType, rb.getAlias(), &status);
    122    ures_getByKey(rb.getAlias(), "eras", rb.getAlias(), &status);
    123 
    124    if (U_FAILURE(status)) {
    125        return nullptr;
    126    }
    127 
    128    int32_t numEras = ures_getSize(rb.getAlias());
    129    int32_t firstTentativeIdx = MAX_INT32;
    130 
    131    UVector32 eraStartDates(numEras, status);
    132    if (U_FAILURE(status)) {
    133        return nullptr;
    134    }
    135 
    136    while (ures_hasNext(rb.getAlias())) {
    137        LocalUResourceBundlePointer eraRuleRes(ures_getNextResource(rb.getAlias(), nullptr, &status));
    138        if (U_FAILURE(status)) {
    139            return nullptr;
    140        }
    141        const char *eraIdxStr = ures_getKey(eraRuleRes.getAlias());
    142        char *endp;
    143        int32_t eraIdx = static_cast<int32_t>(uprv_strtol(eraIdxStr, &endp, 10));
    144        if (static_cast<size_t>(endp - eraIdxStr) != uprv_strlen(eraIdxStr)) {
    145            status = U_INVALID_FORMAT_ERROR;
    146            return nullptr;
    147        }
    148        if (eraIdx < 0) {
    149            status = U_INVALID_FORMAT_ERROR;
    150            return nullptr;
    151        }
    152        if (eraIdx + 1 > eraStartDates.size()) {
    153            eraStartDates.ensureCapacity(eraIdx + 1, status); // needed only to minimize expansions
    154            // Fill in 0 for all added slots (else they are undefined)
    155            while (eraStartDates.size() < eraIdx + 1) {
    156                eraStartDates.addElement(0, status);
    157            }
    158            if (U_FAILURE(status)) {
    159                return nullptr;
    160            }
    161        }
    162        // Now set the startDate that we just read
    163        if (isSet(eraStartDates.elementAti(eraIdx))) {
    164            // start date of the index was already set
    165            status = U_INVALID_FORMAT_ERROR;
    166            return nullptr;
    167        }
    168 
    169        UBool hasName = true;
    170        UBool hasEnd = true;
    171        int32_t len;
    172        while (ures_hasNext(eraRuleRes.getAlias())) {
    173            LocalUResourceBundlePointer res(ures_getNextResource(eraRuleRes.getAlias(), nullptr, &status));
    174            if (U_FAILURE(status)) {
    175                return nullptr;
    176            }
    177            const char *key = ures_getKey(res.getAlias());
    178            if (uprv_strcmp(key, "start") == 0) {
    179                const int32_t *fields = ures_getIntVector(res.getAlias(), &len, &status);
    180                if (U_FAILURE(status)) {
    181                    return nullptr;
    182                }
    183                if (len != 3 || !isValidRuleStartDate(fields[0], fields[1], fields[2])) {
    184                    status = U_INVALID_FORMAT_ERROR;
    185                    return nullptr;
    186                }
    187                eraStartDates.setElementAt(encodeDate(fields[0], fields[1], fields[2]), eraIdx);
    188            } else if (uprv_strcmp(key, "named") == 0) {
    189                const char16_t *val = ures_getString(res.getAlias(), &len, &status);
    190                if (u_strncmp(val, VAL_FALSE, VAL_FALSE_LEN) == 0) {
    191                    hasName = false;
    192                }
    193            } else if (uprv_strcmp(key, "end") == 0) {
    194                hasEnd = true;
    195            }
    196        }
    197 
    198        if (isSet(eraStartDates.elementAti(eraIdx))) {
    199            if (hasEnd) {
    200                // This implementation assumes either start or end is available, not both.
    201                // For now, just ignore the end rule.
    202            }
    203        } else {
    204            if (hasEnd) {
    205                // The islamic calendars now have an end-only rule for the
    206                // second (and final) entry; basically they are in reverse order.
    207                eraStartDates.setElementAt(MIN_ENCODED_START, eraIdx);
    208            } else {
    209                status = U_INVALID_FORMAT_ERROR;
    210                return nullptr;
    211            }
    212        }
    213 
    214        if (hasName) {
    215            if (eraIdx >= firstTentativeIdx) {
    216                status = U_INVALID_FORMAT_ERROR;
    217                return nullptr;
    218            }
    219        } else {
    220            if (eraIdx < firstTentativeIdx) {
    221                firstTentativeIdx = eraIdx;
    222            }
    223        }
    224    }
    225 
    226    // Remove from eraStartDates any tentative eras if they should not be included
    227    // (these would be the last entries). Also reduce numEras appropriately.
    228    if (!includeTentativeEra) {
    229        while (firstTentativeIdx < eraStartDates.size()) {
    230            int32_t lastEraIdx = eraStartDates.size() - 1;
    231            if (isSet(eraStartDates.elementAti(lastEraIdx))) { // If there are multiple tentativeEras, some may be unset
    232                numEras--;
    233            }
    234            eraStartDates.removeElementAt(lastEraIdx);
    235        }
    236        // Remove any remaining trailing unSet entries
    237        // (can only have these if tentativeEras have been removed)
    238        while (eraStartDates.size() > 0 && !isSet(eraStartDates.elementAti(eraStartDates.size() - 1))) {
    239            eraStartDates.removeElementAt(eraStartDates.size() - 1);
    240        }
    241    }
    242    // Remove from eraStartDates any initial 0 entries, keeping the original index (eraCode)
    243    // of the first non-zero entry as minEra; then we can add that back to the offset in the
    244    // compressed array to get the correct eraCode.
    245    int32_t minEra = 0;
    246    while (eraStartDates.size() > 0 && !isSet(eraStartDates.elementAti(0))) {
    247        eraStartDates.removeElementAt(0);
    248        minEra++;
    249    }
    250    // Convert eraStartDates to int32_t array startDates and pass to EraRules constructor,
    251    // along with startDatesLength, minEra and numEras (which may be different from startDatesLength)
    252    LocalMemory<int32_t> startDates(static_cast<int32_t *>(uprv_malloc(eraStartDates.size() * sizeof(int32_t))));
    253    if (startDates.isNull()) {
    254        status = U_MEMORY_ALLOCATION_ERROR;
    255        return nullptr;
    256    }
    257    for (int32_t eraIdx = 0; eraIdx < eraStartDates.size(); eraIdx++) {
    258        startDates[eraIdx] = eraStartDates.elementAti(eraIdx);
    259    }
    260    EraRules *result = new EraRules(startDates, eraStartDates.size(), minEra, numEras);
    261    if (result == nullptr) {
    262        status = U_MEMORY_ALLOCATION_ERROR;
    263    }
    264    return result;
    265 }
    266 
    267 void EraRules::getStartDate(int32_t eraCode, int32_t (&fields)[3], UErrorCode& status) const {
    268    if(U_FAILURE(status)) {
    269        return;
    270    }
    271    int32_t startDate = 0;
    272    if (eraCode >= minEra) {
    273        int32_t startIdx = eraCode - minEra;
    274        if (startIdx < startDatesLength) {
    275            startDate = startDates[startIdx];
    276        }
    277    }
    278    if (isSet(startDate)) {
    279        decodeDate(startDate, fields);
    280        return;
    281    }
    282    // We did not find the requested eraCode in our data
    283    status = U_ILLEGAL_ARGUMENT_ERROR;
    284    return;
    285 }
    286 
    287 int32_t EraRules::getStartYear(int32_t eraCode, UErrorCode& status) const {
    288    int year = MAX_INT32;   // bogus value
    289    if(U_FAILURE(status)) {
    290        return year;
    291    }
    292    int32_t startDate = 0;
    293    if (eraCode >= minEra) {
    294        int32_t startIdx = eraCode - minEra;
    295        if (startIdx < startDatesLength) {
    296            startDate = startDates[startIdx];
    297        }
    298    }
    299    if (isSet(startDate)) {
    300        int fields[3];
    301        decodeDate(startDate, fields);
    302        year = fields[0];
    303        return year;
    304    }
    305    // We did not find the requested eraCode in our data
    306    status = U_ILLEGAL_ARGUMENT_ERROR;
    307    return year;
    308 }
    309 
    310 int32_t EraRules::getEraCode(int32_t year, int32_t month, int32_t day, UErrorCode& status) const {
    311    if(U_FAILURE(status)) {
    312        return -1;
    313    }
    314 
    315    if (month < 1 || month > 12 || day < 1 || day > 31) {
    316        status = U_ILLEGAL_ARGUMENT_ERROR;
    317        return -1;
    318    }
    319    if (numEras > 1 && startDates[startDatesLength-1] == MIN_ENCODED_START) {
    320        // Multiple eras in reverse order, linear search from beginning.
    321        // Currently only for islamic.
    322        for (int startIdx = 0; startIdx < startDatesLength; startIdx++) {
    323            if (!isSet(startDates[startIdx])) {
    324                continue;
    325            }
    326            if (compareEncodedDateWithYMD(startDates[startIdx], year, month, day) <= 0) {
    327                return minEra + startIdx;
    328            }
    329        }
    330    }
    331    // Linear search from the end, which should hit the most likely eras first.
    332    // Also this is the most efficient for any era if we have < 8 or so eras, so only less
    333    // efficient for early eras in Japanese calendar (while we still have them). Formerly
    334    // this used binary search which would only be better for those early Japanese eras,
    335    // but now that is much more difficult since there may be holes in the sorted list.
    336    // Note with this change, this no longer uses or depends on currentEra.
    337    for (int startIdx = startDatesLength; startIdx > 0;) {
    338        if (!isSet(startDates[--startIdx])) {
    339            continue;
    340        }
    341        if (compareEncodedDateWithYMD(startDates[startIdx], year, month, day) <= 0) {
    342            return minEra + startIdx;
    343        }
    344    }
    345    return minEra;
    346 }
    347 
    348 void EraRules::initCurrentEra() {
    349    // Compute local wall time in millis using ICU's default time zone.
    350    UErrorCode ec = U_ZERO_ERROR;
    351    UDate localMillis = ucal_getNow();
    352 
    353    int32_t rawOffset, dstOffset;
    354    TimeZone* zone = TimeZone::createDefault();
    355    // If we failed to create the default time zone, we are in a bad state and don't
    356    // really have many options. Carry on using UTC millis as a fallback.
    357    if (zone != nullptr) {
    358        zone->getOffset(localMillis, false, rawOffset, dstOffset, ec);
    359        delete zone;
    360        localMillis += (rawOffset + dstOffset);
    361    }
    362 
    363    int32_t year, mid;
    364    int8_t  month0, dom;
    365    Grego::timeToFields(localMillis, year, month0, dom, mid, ec);
    366    currentEra = minEra;
    367    if (U_FAILURE(ec)) { return; }
    368    // Now that getEraCode no longer depends on currentEra, we can just do this:
    369    currentEra = getEraCode(year, month0 + 1 /* changes to 1-base */, dom, ec);
    370    if (U_FAILURE(ec)) {
    371        currentEra = minEra;
    372    }
    373 }
    374 
    375 U_NAMESPACE_END
    376 #endif /* #if !UCONFIG_NO_FORMATTING */