erarules.cpp (13546B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include <utility> 5 6 #include "unicode/utypes.h" 7 8 #if !UCONFIG_NO_FORMATTING 9 10 #include <stdlib.h> 11 #include "unicode/ucal.h" 12 #include "unicode/ures.h" 13 #include "unicode/ustring.h" 14 #include "unicode/timezone.h" 15 #include "cmemory.h" 16 #include "cstring.h" 17 #include "erarules.h" 18 #include "gregoimp.h" 19 #include "uassert.h" 20 #include "uvectr32.h" 21 22 U_NAMESPACE_BEGIN 23 24 static const int32_t MAX_ENCODED_START_YEAR = 32767; 25 static const int32_t MIN_ENCODED_START_YEAR = -32768; 26 static const int32_t MIN_ENCODED_START = -2147483391; // encodeDate(MIN_ENCODED_START_YEAR, 1, 1, ...); 27 28 static const int32_t YEAR_MASK = 0xFFFF0000; 29 static const int32_t MONTH_MASK = 0x0000FF00; 30 static const int32_t DAY_MASK = 0x000000FF; 31 32 static const int32_t MAX_INT32 = 0x7FFFFFFF; 33 static const int32_t MIN_INT32 = 0xFFFFFFFF; 34 35 static const char16_t VAL_FALSE[] = {0x66, 0x61, 0x6c, 0x73, 0x65}; // "false" 36 static const char16_t VAL_FALSE_LEN = 5; 37 38 static UBool isSet(int startDate) { 39 return startDate != 0; 40 } 41 42 static UBool isValidRuleStartDate(int32_t year, int32_t month, int32_t day) { 43 return year >= MIN_ENCODED_START_YEAR && year <= MAX_ENCODED_START_YEAR 44 && month >= 1 && month <= 12 && day >=1 && day <= 31; 45 } 46 47 /** 48 * Encode year/month/date to a single integer. 49 * year is high 16 bits (-32768 to 32767), month is 50 * next 8 bits and day of month is last 8 bits. 51 * 52 * @param year year 53 * @param month month (1-base) 54 * @param day day of month 55 * @return an encoded date. 56 */ 57 static int32_t encodeDate(int32_t year, int32_t month, int32_t day) { 58 return static_cast<int32_t>(static_cast<uint32_t>(year) << 16) | month << 8 | day; 59 } 60 61 static void decodeDate(int32_t encodedDate, int32_t (&fields)[3]) { 62 if (encodedDate == MIN_ENCODED_START) { 63 fields[0] = MIN_INT32; 64 fields[1] = 1; 65 fields[2] = 1; 66 } else { 67 fields[0] = (encodedDate & YEAR_MASK) >> 16; 68 fields[1] = (encodedDate & MONTH_MASK) >> 8; 69 fields[2] = encodedDate & DAY_MASK; 70 } 71 } 72 73 /** 74 * Compare an encoded date with another date specified by year/month/day. 75 * @param encoded An encoded date 76 * @param year Year of another date 77 * @param month Month of another date 78 * @param day Day of another date 79 * @return -1 when encoded date is earlier, 0 when two dates are same, 80 * and 1 when encoded date is later. 81 */ 82 static int32_t compareEncodedDateWithYMD(int encoded, int year, int month, int day) { 83 if (year < MIN_ENCODED_START_YEAR) { 84 if (encoded == MIN_ENCODED_START) { 85 if (year > MIN_INT32 || month > 1 || day > 1) { 86 return -1; 87 } 88 return 0; 89 } else { 90 return 1; 91 } 92 } else if (year > MAX_ENCODED_START_YEAR) { 93 return -1; 94 } else { 95 int tmp = encodeDate(year, month, day); 96 if (encoded < tmp) { 97 return -1; 98 } else if (encoded == tmp) { 99 return 0; 100 } else { 101 return 1; 102 } 103 } 104 } 105 106 EraRules::EraRules(LocalMemory<int32_t>& startDatesIn, int32_t startDatesLengthIn, int32_t minEraIn, int32_t numErasIn) 107 : startDatesLength(startDatesLengthIn), minEra(minEraIn), numEras(numErasIn) { 108 startDates = std::move(startDatesIn); 109 initCurrentEra(); 110 } 111 112 EraRules::~EraRules() { 113 } 114 115 EraRules* EraRules::createInstance(const char *calType, UBool includeTentativeEra, UErrorCode& status) { 116 if(U_FAILURE(status)) { 117 return nullptr; 118 } 119 LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status)); 120 ures_getByKey(rb.getAlias(), "calendarData", rb.getAlias(), &status); 121 ures_getByKey(rb.getAlias(), calType, rb.getAlias(), &status); 122 ures_getByKey(rb.getAlias(), "eras", rb.getAlias(), &status); 123 124 if (U_FAILURE(status)) { 125 return nullptr; 126 } 127 128 int32_t numEras = ures_getSize(rb.getAlias()); 129 int32_t firstTentativeIdx = MAX_INT32; 130 131 UVector32 eraStartDates(numEras, status); 132 if (U_FAILURE(status)) { 133 return nullptr; 134 } 135 136 while (ures_hasNext(rb.getAlias())) { 137 LocalUResourceBundlePointer eraRuleRes(ures_getNextResource(rb.getAlias(), nullptr, &status)); 138 if (U_FAILURE(status)) { 139 return nullptr; 140 } 141 const char *eraIdxStr = ures_getKey(eraRuleRes.getAlias()); 142 char *endp; 143 int32_t eraIdx = static_cast<int32_t>(uprv_strtol(eraIdxStr, &endp, 10)); 144 if (static_cast<size_t>(endp - eraIdxStr) != uprv_strlen(eraIdxStr)) { 145 status = U_INVALID_FORMAT_ERROR; 146 return nullptr; 147 } 148 if (eraIdx < 0) { 149 status = U_INVALID_FORMAT_ERROR; 150 return nullptr; 151 } 152 if (eraIdx + 1 > eraStartDates.size()) { 153 eraStartDates.ensureCapacity(eraIdx + 1, status); // needed only to minimize expansions 154 // Fill in 0 for all added slots (else they are undefined) 155 while (eraStartDates.size() < eraIdx + 1) { 156 eraStartDates.addElement(0, status); 157 } 158 if (U_FAILURE(status)) { 159 return nullptr; 160 } 161 } 162 // Now set the startDate that we just read 163 if (isSet(eraStartDates.elementAti(eraIdx))) { 164 // start date of the index was already set 165 status = U_INVALID_FORMAT_ERROR; 166 return nullptr; 167 } 168 169 UBool hasName = true; 170 UBool hasEnd = true; 171 int32_t len; 172 while (ures_hasNext(eraRuleRes.getAlias())) { 173 LocalUResourceBundlePointer res(ures_getNextResource(eraRuleRes.getAlias(), nullptr, &status)); 174 if (U_FAILURE(status)) { 175 return nullptr; 176 } 177 const char *key = ures_getKey(res.getAlias()); 178 if (uprv_strcmp(key, "start") == 0) { 179 const int32_t *fields = ures_getIntVector(res.getAlias(), &len, &status); 180 if (U_FAILURE(status)) { 181 return nullptr; 182 } 183 if (len != 3 || !isValidRuleStartDate(fields[0], fields[1], fields[2])) { 184 status = U_INVALID_FORMAT_ERROR; 185 return nullptr; 186 } 187 eraStartDates.setElementAt(encodeDate(fields[0], fields[1], fields[2]), eraIdx); 188 } else if (uprv_strcmp(key, "named") == 0) { 189 const char16_t *val = ures_getString(res.getAlias(), &len, &status); 190 if (u_strncmp(val, VAL_FALSE, VAL_FALSE_LEN) == 0) { 191 hasName = false; 192 } 193 } else if (uprv_strcmp(key, "end") == 0) { 194 hasEnd = true; 195 } 196 } 197 198 if (isSet(eraStartDates.elementAti(eraIdx))) { 199 if (hasEnd) { 200 // This implementation assumes either start or end is available, not both. 201 // For now, just ignore the end rule. 202 } 203 } else { 204 if (hasEnd) { 205 // The islamic calendars now have an end-only rule for the 206 // second (and final) entry; basically they are in reverse order. 207 eraStartDates.setElementAt(MIN_ENCODED_START, eraIdx); 208 } else { 209 status = U_INVALID_FORMAT_ERROR; 210 return nullptr; 211 } 212 } 213 214 if (hasName) { 215 if (eraIdx >= firstTentativeIdx) { 216 status = U_INVALID_FORMAT_ERROR; 217 return nullptr; 218 } 219 } else { 220 if (eraIdx < firstTentativeIdx) { 221 firstTentativeIdx = eraIdx; 222 } 223 } 224 } 225 226 // Remove from eraStartDates any tentative eras if they should not be included 227 // (these would be the last entries). Also reduce numEras appropriately. 228 if (!includeTentativeEra) { 229 while (firstTentativeIdx < eraStartDates.size()) { 230 int32_t lastEraIdx = eraStartDates.size() - 1; 231 if (isSet(eraStartDates.elementAti(lastEraIdx))) { // If there are multiple tentativeEras, some may be unset 232 numEras--; 233 } 234 eraStartDates.removeElementAt(lastEraIdx); 235 } 236 // Remove any remaining trailing unSet entries 237 // (can only have these if tentativeEras have been removed) 238 while (eraStartDates.size() > 0 && !isSet(eraStartDates.elementAti(eraStartDates.size() - 1))) { 239 eraStartDates.removeElementAt(eraStartDates.size() - 1); 240 } 241 } 242 // Remove from eraStartDates any initial 0 entries, keeping the original index (eraCode) 243 // of the first non-zero entry as minEra; then we can add that back to the offset in the 244 // compressed array to get the correct eraCode. 245 int32_t minEra = 0; 246 while (eraStartDates.size() > 0 && !isSet(eraStartDates.elementAti(0))) { 247 eraStartDates.removeElementAt(0); 248 minEra++; 249 } 250 // Convert eraStartDates to int32_t array startDates and pass to EraRules constructor, 251 // along with startDatesLength, minEra and numEras (which may be different from startDatesLength) 252 LocalMemory<int32_t> startDates(static_cast<int32_t *>(uprv_malloc(eraStartDates.size() * sizeof(int32_t)))); 253 if (startDates.isNull()) { 254 status = U_MEMORY_ALLOCATION_ERROR; 255 return nullptr; 256 } 257 for (int32_t eraIdx = 0; eraIdx < eraStartDates.size(); eraIdx++) { 258 startDates[eraIdx] = eraStartDates.elementAti(eraIdx); 259 } 260 EraRules *result = new EraRules(startDates, eraStartDates.size(), minEra, numEras); 261 if (result == nullptr) { 262 status = U_MEMORY_ALLOCATION_ERROR; 263 } 264 return result; 265 } 266 267 void EraRules::getStartDate(int32_t eraCode, int32_t (&fields)[3], UErrorCode& status) const { 268 if(U_FAILURE(status)) { 269 return; 270 } 271 int32_t startDate = 0; 272 if (eraCode >= minEra) { 273 int32_t startIdx = eraCode - minEra; 274 if (startIdx < startDatesLength) { 275 startDate = startDates[startIdx]; 276 } 277 } 278 if (isSet(startDate)) { 279 decodeDate(startDate, fields); 280 return; 281 } 282 // We did not find the requested eraCode in our data 283 status = U_ILLEGAL_ARGUMENT_ERROR; 284 return; 285 } 286 287 int32_t EraRules::getStartYear(int32_t eraCode, UErrorCode& status) const { 288 int year = MAX_INT32; // bogus value 289 if(U_FAILURE(status)) { 290 return year; 291 } 292 int32_t startDate = 0; 293 if (eraCode >= minEra) { 294 int32_t startIdx = eraCode - minEra; 295 if (startIdx < startDatesLength) { 296 startDate = startDates[startIdx]; 297 } 298 } 299 if (isSet(startDate)) { 300 int fields[3]; 301 decodeDate(startDate, fields); 302 year = fields[0]; 303 return year; 304 } 305 // We did not find the requested eraCode in our data 306 status = U_ILLEGAL_ARGUMENT_ERROR; 307 return year; 308 } 309 310 int32_t EraRules::getEraCode(int32_t year, int32_t month, int32_t day, UErrorCode& status) const { 311 if(U_FAILURE(status)) { 312 return -1; 313 } 314 315 if (month < 1 || month > 12 || day < 1 || day > 31) { 316 status = U_ILLEGAL_ARGUMENT_ERROR; 317 return -1; 318 } 319 if (numEras > 1 && startDates[startDatesLength-1] == MIN_ENCODED_START) { 320 // Multiple eras in reverse order, linear search from beginning. 321 // Currently only for islamic. 322 for (int startIdx = 0; startIdx < startDatesLength; startIdx++) { 323 if (!isSet(startDates[startIdx])) { 324 continue; 325 } 326 if (compareEncodedDateWithYMD(startDates[startIdx], year, month, day) <= 0) { 327 return minEra + startIdx; 328 } 329 } 330 } 331 // Linear search from the end, which should hit the most likely eras first. 332 // Also this is the most efficient for any era if we have < 8 or so eras, so only less 333 // efficient for early eras in Japanese calendar (while we still have them). Formerly 334 // this used binary search which would only be better for those early Japanese eras, 335 // but now that is much more difficult since there may be holes in the sorted list. 336 // Note with this change, this no longer uses or depends on currentEra. 337 for (int startIdx = startDatesLength; startIdx > 0;) { 338 if (!isSet(startDates[--startIdx])) { 339 continue; 340 } 341 if (compareEncodedDateWithYMD(startDates[startIdx], year, month, day) <= 0) { 342 return minEra + startIdx; 343 } 344 } 345 return minEra; 346 } 347 348 void EraRules::initCurrentEra() { 349 // Compute local wall time in millis using ICU's default time zone. 350 UErrorCode ec = U_ZERO_ERROR; 351 UDate localMillis = ucal_getNow(); 352 353 int32_t rawOffset, dstOffset; 354 TimeZone* zone = TimeZone::createDefault(); 355 // If we failed to create the default time zone, we are in a bad state and don't 356 // really have many options. Carry on using UTC millis as a fallback. 357 if (zone != nullptr) { 358 zone->getOffset(localMillis, false, rawOffset, dstOffset, ec); 359 delete zone; 360 localMillis += (rawOffset + dstOffset); 361 } 362 363 int32_t year, mid; 364 int8_t month0, dom; 365 Grego::timeToFields(localMillis, year, month0, dom, mid, ec); 366 currentEra = minEra; 367 if (U_FAILURE(ec)) { return; } 368 // Now that getEraCode no longer depends on currentEra, we can just do this: 369 currentEra = getEraCode(year, month0 + 1 /* changes to 1-base */, dom, ec); 370 if (U_FAILURE(ec)) { 371 currentEra = minEra; 372 } 373 } 374 375 U_NAMESPACE_END 376 #endif /* #if !UCONFIG_NO_FORMATTING */