dayperiodrules.cpp (18227B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * dayperiodrules.cpp 9 * 10 * created on: 2016-01-20 11 * created by: kazede 12 */ 13 14 #include "dayperiodrules.h" 15 16 #include "unicode/ures.h" 17 #include "charstr.h" 18 #include "cstring.h" 19 #include "ucln_in.h" 20 #include "uhash.h" 21 #include "ulocimp.h" 22 #include "umutex.h" 23 #include "uresimp.h" 24 25 26 U_NAMESPACE_BEGIN 27 28 namespace { 29 30 struct DayPeriodRulesData : public UMemory { 31 DayPeriodRulesData() : localeToRuleSetNumMap(nullptr), rules(nullptr), maxRuleSetNum(0) {} 32 33 UHashtable *localeToRuleSetNumMap; 34 DayPeriodRules *rules; 35 int32_t maxRuleSetNum; 36 } *data = nullptr; 37 38 enum CutoffType { 39 CUTOFF_TYPE_UNKNOWN = -1, 40 CUTOFF_TYPE_BEFORE, 41 CUTOFF_TYPE_AFTER, // TODO: AFTER is deprecated in CLDR 29. Remove. 42 CUTOFF_TYPE_FROM, 43 CUTOFF_TYPE_AT 44 }; 45 46 } // namespace 47 48 struct DayPeriodRulesDataSink : public ResourceSink { 49 DayPeriodRulesDataSink() { 50 for (int32_t i = 0; i < UPRV_LENGTHOF(cutoffs); ++i) { cutoffs[i] = 0; } 51 } 52 virtual ~DayPeriodRulesDataSink(); 53 54 virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { 55 ResourceTable dayPeriodData = value.getTable(errorCode); 56 if (U_FAILURE(errorCode)) { return; } 57 58 for (int32_t i = 0; dayPeriodData.getKeyAndValue(i, key, value); ++i) { 59 if (uprv_strcmp(key, "locales") == 0) { 60 ResourceTable locales = value.getTable(errorCode); 61 if (U_FAILURE(errorCode)) { return; } 62 63 for (int32_t j = 0; locales.getKeyAndValue(j, key, value); ++j) { 64 UnicodeString setNum_str = value.getUnicodeString(errorCode); 65 int32_t setNum = parseSetNum(setNum_str, errorCode); 66 uhash_puti(data->localeToRuleSetNumMap, const_cast<char *>(key), setNum, &errorCode); 67 } 68 } else if (uprv_strcmp(key, "rules") == 0) { 69 // Allocate one more than needed to skip [0]. See comment in parseSetNum(). 70 data->rules = new DayPeriodRules[data->maxRuleSetNum + 1]; 71 if (data->rules == nullptr) { 72 errorCode = U_MEMORY_ALLOCATION_ERROR; 73 return; 74 } 75 ResourceTable rules = value.getTable(errorCode); 76 processRules(rules, key, value, errorCode); 77 if (U_FAILURE(errorCode)) { return; } 78 } 79 } 80 } 81 82 void processRules(const ResourceTable &rules, const char *key, 83 ResourceValue &value, UErrorCode &errorCode) { 84 if (U_FAILURE(errorCode)) { return; } 85 86 for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { 87 ruleSetNum = parseSetNum(key, errorCode); 88 ResourceTable ruleSet = value.getTable(errorCode); 89 if (U_FAILURE(errorCode)) { return; } 90 91 for (int32_t j = 0; ruleSet.getKeyAndValue(j, key, value); ++j) { 92 period = DayPeriodRules::getDayPeriodFromString(key); 93 if (period == DayPeriodRules::DAYPERIOD_UNKNOWN) { 94 errorCode = U_INVALID_FORMAT_ERROR; 95 return; 96 } 97 ResourceTable periodDefinition = value.getTable(errorCode); 98 if (U_FAILURE(errorCode)) { return; } 99 100 for (int32_t k = 0; periodDefinition.getKeyAndValue(k, key, value); ++k) { 101 if (value.getType() == URES_STRING) { 102 // Key-value pairs (e.g. before{6:00}). 103 CutoffType type = getCutoffTypeFromString(key); 104 addCutoff(type, value.getUnicodeString(errorCode), errorCode); 105 if (U_FAILURE(errorCode)) { return; } 106 } else { 107 // Arrays (e.g. before{6:00, 24:00}). 108 cutoffType = getCutoffTypeFromString(key); 109 ResourceArray cutoffArray = value.getArray(errorCode); 110 if (U_FAILURE(errorCode)) { return; } 111 112 int32_t length = cutoffArray.getSize(); 113 for (int32_t l = 0; l < length; ++l) { 114 cutoffArray.getValue(l, value); 115 addCutoff(cutoffType, value.getUnicodeString(errorCode), errorCode); 116 if (U_FAILURE(errorCode)) { return; } 117 } 118 } 119 } 120 setDayPeriodForHoursFromCutoffs(errorCode); 121 for (int32_t k = 0; k < UPRV_LENGTHOF(cutoffs); ++k) { 122 cutoffs[k] = 0; 123 } 124 } 125 126 if (!data->rules[ruleSetNum].allHoursAreSet()) { 127 errorCode = U_INVALID_FORMAT_ERROR; 128 return; 129 } 130 } 131 } 132 133 // Members. 134 int32_t cutoffs[25]; // [0] thru [24]: 24 is allowed in "before 24". 135 136 // "Path" to data. 137 int32_t ruleSetNum; 138 DayPeriodRules::DayPeriod period; 139 CutoffType cutoffType; 140 141 // Helpers. 142 static int32_t parseSetNum(const UnicodeString &setNumStr, UErrorCode &errorCode) { 143 CharString cs; 144 cs.appendInvariantChars(setNumStr, errorCode); 145 return parseSetNum(cs.data(), errorCode); 146 } 147 148 static int32_t parseSetNum(const char *setNumStr, UErrorCode &errorCode) { 149 if (U_FAILURE(errorCode)) { return -1; } 150 151 if (uprv_strncmp(setNumStr, "set", 3) != 0) { 152 errorCode = U_INVALID_FORMAT_ERROR; 153 return -1; 154 } 155 156 int32_t i = 3; 157 int32_t setNum = 0; 158 while (setNumStr[i] != 0) { 159 int32_t digit = setNumStr[i] - '0'; 160 if (digit < 0 || 9 < digit) { 161 errorCode = U_INVALID_FORMAT_ERROR; 162 return -1; 163 } 164 setNum = 10 * setNum + digit; 165 ++i; 166 } 167 168 // Rule set number must not be zero. (0 is used to indicate "not found" by hashmap.) 169 // Currently ICU data conveniently starts numbering rule sets from 1. 170 if (setNum == 0) { 171 errorCode = U_INVALID_FORMAT_ERROR; 172 return -1; 173 } else { 174 return setNum; 175 } 176 } 177 178 void addCutoff(CutoffType type, const UnicodeString &hour_str, UErrorCode &errorCode) { 179 if (U_FAILURE(errorCode)) { return; } 180 181 if (type == CUTOFF_TYPE_UNKNOWN) { 182 errorCode = U_INVALID_FORMAT_ERROR; 183 return; 184 } 185 186 int32_t hour = parseHour(hour_str, errorCode); 187 if (U_FAILURE(errorCode)) { return; } 188 189 cutoffs[hour] |= 1 << type; 190 } 191 192 // Translate the cutoffs[] array to day period rules. 193 void setDayPeriodForHoursFromCutoffs(UErrorCode &errorCode) { 194 DayPeriodRules &rule = data->rules[ruleSetNum]; 195 196 for (int32_t startHour = 0; startHour <= 24; ++startHour) { 197 // AT cutoffs must be either midnight or noon. 198 if (cutoffs[startHour] & (1 << CUTOFF_TYPE_AT)) { 199 if (startHour == 0 && period == DayPeriodRules::DAYPERIOD_MIDNIGHT) { 200 rule.fHasMidnight = true; 201 } else if (startHour == 12 && period == DayPeriodRules::DAYPERIOD_NOON) { 202 rule.fHasNoon = true; 203 } else { 204 errorCode = U_INVALID_FORMAT_ERROR; // Bad data. 205 return; 206 } 207 } 208 209 // FROM/AFTER and BEFORE must come in a pair. 210 if (cutoffs[startHour] & (1 << CUTOFF_TYPE_FROM) || 211 cutoffs[startHour] & (1 << CUTOFF_TYPE_AFTER)) { 212 for (int32_t hour = startHour + 1;; ++hour) { 213 if (hour == startHour) { 214 // We've gone around the array once and can't find a BEFORE. 215 errorCode = U_INVALID_FORMAT_ERROR; 216 return; 217 } 218 if (hour == 25) { hour = 0; } 219 if (cutoffs[hour] & (1 << CUTOFF_TYPE_BEFORE)) { 220 rule.add(startHour, hour, period); 221 break; 222 } 223 } 224 } 225 } 226 } 227 228 // Translate "before" to CUTOFF_TYPE_BEFORE, for example. 229 static CutoffType getCutoffTypeFromString(const char *type_str) { 230 if (uprv_strcmp(type_str, "from") == 0) { 231 return CUTOFF_TYPE_FROM; 232 } else if (uprv_strcmp(type_str, "before") == 0) { 233 return CUTOFF_TYPE_BEFORE; 234 } else if (uprv_strcmp(type_str, "after") == 0) { 235 return CUTOFF_TYPE_AFTER; 236 } else if (uprv_strcmp(type_str, "at") == 0) { 237 return CUTOFF_TYPE_AT; 238 } else { 239 return CUTOFF_TYPE_UNKNOWN; 240 } 241 } 242 243 // Gets the numerical value of the hour from the Unicode string. 244 static int32_t parseHour(const UnicodeString &time, UErrorCode &errorCode) { 245 if (U_FAILURE(errorCode)) { 246 return 0; 247 } 248 249 int32_t hourLimit = time.length() - 3; 250 // `time` must look like "x:00" or "xx:00". 251 // If length is wrong or `time` doesn't end with ":00", error out. 252 if ((hourLimit != 1 && hourLimit != 2) || 253 time[hourLimit] != 0x3A || time[hourLimit + 1] != 0x30 || 254 time[hourLimit + 2] != 0x30) { 255 errorCode = U_INVALID_FORMAT_ERROR; 256 return 0; 257 } 258 259 // If `time` doesn't begin with a number in [0, 24], error out. 260 // Note: "24:00" is possible in "before 24:00". 261 int32_t hour = time[0] - 0x30; 262 if (hour < 0 || 9 < hour) { 263 errorCode = U_INVALID_FORMAT_ERROR; 264 return 0; 265 } 266 if (hourLimit == 2) { 267 int32_t hourDigit2 = time[1] - 0x30; 268 if (hourDigit2 < 0 || 9 < hourDigit2) { 269 errorCode = U_INVALID_FORMAT_ERROR; 270 return 0; 271 } 272 hour = hour * 10 + hourDigit2; 273 if (hour > 24) { 274 errorCode = U_INVALID_FORMAT_ERROR; 275 return 0; 276 } 277 } 278 279 return hour; 280 } 281 }; // struct DayPeriodRulesDataSink 282 283 struct DayPeriodRulesCountSink : public ResourceSink { 284 virtual ~DayPeriodRulesCountSink(); 285 286 virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { 287 ResourceTable rules = value.getTable(errorCode); 288 if (U_FAILURE(errorCode)) { return; } 289 290 for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { 291 int32_t setNum = DayPeriodRulesDataSink::parseSetNum(key, errorCode); 292 if (setNum > data->maxRuleSetNum) { 293 data->maxRuleSetNum = setNum; 294 } 295 } 296 } 297 }; 298 299 // Out-of-line virtual destructors. 300 DayPeriodRulesDataSink::~DayPeriodRulesDataSink() {} 301 DayPeriodRulesCountSink::~DayPeriodRulesCountSink() {} 302 303 namespace { 304 305 UInitOnce initOnce {}; 306 307 U_CFUNC UBool U_CALLCONV dayPeriodRulesCleanup() { 308 delete[] data->rules; 309 uhash_close(data->localeToRuleSetNumMap); 310 delete data; 311 data = nullptr; 312 return true; 313 } 314 315 } // namespace 316 317 void U_CALLCONV DayPeriodRules::load(UErrorCode &errorCode) { 318 if (U_FAILURE(errorCode)) { 319 return; 320 } 321 322 data = new DayPeriodRulesData(); 323 data->localeToRuleSetNumMap = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode); 324 LocalUResourceBundlePointer rb_dayPeriods(ures_openDirect(nullptr, "dayPeriods", &errorCode)); 325 326 // Get the largest rule set number (so we allocate enough objects). 327 DayPeriodRulesCountSink countSink; 328 ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "rules", countSink, errorCode); 329 330 // Populate rules. 331 DayPeriodRulesDataSink sink; 332 ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "", sink, errorCode); 333 334 ucln_i18n_registerCleanup(UCLN_I18N_DAYPERIODRULES, dayPeriodRulesCleanup); 335 } 336 337 const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCode &errorCode) { 338 umtx_initOnce(initOnce, DayPeriodRules::load, errorCode); 339 340 // If the entire day period rules data doesn't conform to spec (even if the part we want 341 // does), return nullptr. 342 if(U_FAILURE(errorCode)) { return nullptr; } 343 344 const char *localeCode = locale.getBaseName(); 345 char name[ULOC_FULLNAME_CAPACITY]; 346 347 if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) { 348 uprv_strcpy(name, localeCode); 349 350 // Treat empty string as root. 351 if (*name == '\0') { 352 uprv_strcpy(name, "root"); 353 } 354 } else { 355 errorCode = U_BUFFER_OVERFLOW_ERROR; 356 return nullptr; 357 } 358 359 int32_t ruleSetNum = 0; // NB there is no rule set 0 and 0 is returned upon lookup failure. 360 while (*name != '\0') { 361 ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name); 362 if (ruleSetNum == 0) { 363 CharString parent = ulocimp_getParent(name, errorCode); 364 if (parent.isEmpty()) { 365 // Saves a lookup in the hash table. 366 break; 367 } 368 parent.extract(name, UPRV_LENGTHOF(name), errorCode); 369 } else { 370 break; 371 } 372 } 373 374 if (ruleSetNum <= 0 || data->rules[ruleSetNum].getDayPeriodForHour(0) == DAYPERIOD_UNKNOWN) { 375 // If day period for hour 0 is UNKNOWN then day period for all hours are UNKNOWN. 376 // Data doesn't exist even with fallback. 377 return nullptr; 378 } else { 379 return &data->rules[ruleSetNum]; 380 } 381 } 382 383 DayPeriodRules::DayPeriodRules() : fHasMidnight(false), fHasNoon(false) { 384 for (int32_t i = 0; i < 24; ++i) { 385 fDayPeriodForHour[i] = DayPeriodRules::DAYPERIOD_UNKNOWN; 386 } 387 } 388 389 double DayPeriodRules::getMidPointForDayPeriod( 390 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 391 if (U_FAILURE(errorCode)) { return -1; } 392 393 int32_t startHour = getStartHourForDayPeriod(dayPeriod, errorCode); 394 int32_t endHour = getEndHourForDayPeriod(dayPeriod, errorCode); 395 // Can't obtain startHour or endHour; bail out. 396 if (U_FAILURE(errorCode)) { return -1; } 397 398 double midPoint = (startHour + endHour) / 2.0; 399 400 if (startHour > endHour) { 401 // dayPeriod wraps around midnight. Shift midPoint by 12 hours, in the direction that 402 // lands it in [0, 24). 403 midPoint += 12; 404 if (midPoint >= 24) { 405 midPoint -= 24; 406 } 407 } 408 409 return midPoint; 410 } 411 412 int32_t DayPeriodRules::getStartHourForDayPeriod( 413 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 414 if (U_FAILURE(errorCode)) { return -1; } 415 416 if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } 417 if (dayPeriod == DAYPERIOD_NOON) { return 12; } 418 419 if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { 420 // dayPeriod wraps around midnight. Start hour is later than end hour. 421 for (int32_t i = 22; i >= 1; --i) { 422 if (fDayPeriodForHour[i] != dayPeriod) { 423 return (i + 1); 424 } 425 } 426 } else { 427 for (int32_t i = 0; i <= 23; ++i) { 428 if (fDayPeriodForHour[i] == dayPeriod) { 429 return i; 430 } 431 } 432 } 433 434 // dayPeriod doesn't exist in rule set; set error and exit. 435 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 436 return -1; 437 } 438 439 int32_t DayPeriodRules::getEndHourForDayPeriod( 440 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 441 if (U_FAILURE(errorCode)) { return -1; } 442 443 if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } 444 if (dayPeriod == DAYPERIOD_NOON) { return 12; } 445 446 if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { 447 // dayPeriod wraps around midnight. End hour is before start hour. 448 for (int32_t i = 1; i <= 22; ++i) { 449 if (fDayPeriodForHour[i] != dayPeriod) { 450 // i o'clock is when a new period starts, therefore when the old period ends. 451 return i; 452 } 453 } 454 } else { 455 for (int32_t i = 23; i >= 0; --i) { 456 if (fDayPeriodForHour[i] == dayPeriod) { 457 return (i + 1); 458 } 459 } 460 } 461 462 // dayPeriod doesn't exist in rule set; set error and exit. 463 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 464 return -1; 465 } 466 467 DayPeriodRules::DayPeriod DayPeriodRules::getDayPeriodFromString(const char *type_str) { 468 if (uprv_strcmp(type_str, "midnight") == 0) { 469 return DAYPERIOD_MIDNIGHT; 470 } else if (uprv_strcmp(type_str, "noon") == 0) { 471 return DAYPERIOD_NOON; 472 } else if (uprv_strcmp(type_str, "morning1") == 0) { 473 return DAYPERIOD_MORNING1; 474 } else if (uprv_strcmp(type_str, "afternoon1") == 0) { 475 return DAYPERIOD_AFTERNOON1; 476 } else if (uprv_strcmp(type_str, "evening1") == 0) { 477 return DAYPERIOD_EVENING1; 478 } else if (uprv_strcmp(type_str, "night1") == 0) { 479 return DAYPERIOD_NIGHT1; 480 } else if (uprv_strcmp(type_str, "morning2") == 0) { 481 return DAYPERIOD_MORNING2; 482 } else if (uprv_strcmp(type_str, "afternoon2") == 0) { 483 return DAYPERIOD_AFTERNOON2; 484 } else if (uprv_strcmp(type_str, "evening2") == 0) { 485 return DAYPERIOD_EVENING2; 486 } else if (uprv_strcmp(type_str, "night2") == 0) { 487 return DAYPERIOD_NIGHT2; 488 } else if (uprv_strcmp(type_str, "am") == 0) { 489 return DAYPERIOD_AM; 490 } else if (uprv_strcmp(type_str, "pm") == 0) { 491 return DAYPERIOD_PM; 492 } else { 493 return DAYPERIOD_UNKNOWN; 494 } 495 } 496 497 void DayPeriodRules::add(int32_t startHour, int32_t limitHour, DayPeriod period) { 498 for (int32_t i = startHour; i != limitHour; ++i) { 499 if (i == 24) { i = 0; } 500 fDayPeriodForHour[i] = period; 501 } 502 } 503 504 UBool DayPeriodRules::allHoursAreSet() { 505 for (int32_t i = 0; i < 24; ++i) { 506 if (fDayPeriodForHour[i] == DAYPERIOD_UNKNOWN) { return false; } 507 } 508 509 return true; 510 } 511 512 513 514 U_NAMESPACE_END