tz2icu.cpp (63906B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2003-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: July 10 2003 10 * Since: ICU 2.8 11 ********************************************************************** 12 */ 13 #include "tzfile.h" // from Olson tzcode archive, copied to this dir 14 15 #ifdef WIN32 16 17 #include <windows.h> 18 #undef min // windows.h/STL conflict 19 #undef max // windows.h/STL conflict 20 // "identifier was truncated to 'number' characters" warning 21 #pragma warning(disable: 4786) 22 23 #else 24 25 #include <unistd.h> 26 #include <stdio.h> 27 #include <dirent.h> 28 #include <string.h> 29 #include <sys/stat.h> 30 31 #endif 32 33 #include <algorithm> 34 #include <cassert> 35 #include <ctime> 36 #include <fstream> 37 #include <iomanip> 38 #include <iostream> 39 #include <iterator> 40 #include <limits> 41 #include <map> 42 #include <set> 43 #include <sstream> 44 #include <sstream> 45 #include <stdexcept> 46 #include <string> 47 #include <vector> 48 49 #include "tz2icu.h" 50 #include "unicode/uversion.h" 51 52 using namespace std; 53 54 bool ICU44PLUS = true; 55 string TZ_RESOURCE_NAME = ICU_TZ_RESOURCE; 56 57 //-------------------------------------------------------------------- 58 // Time utilities 59 //-------------------------------------------------------------------- 60 61 const int64_t SECS_PER_YEAR = 31536000; // 365 days 62 const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days 63 const int64_t LOWEST_TIME32 = static_cast<int64_t>(static_cast<int32_t>(0x80000000)); 64 const int64_t HIGHEST_TIME32 = static_cast<int64_t>(static_cast<int32_t>(0x7fffffff)); 65 66 bool isLeap(int32_t y) { 67 return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian 68 } 69 70 int64_t secsPerYear(int32_t y) { 71 return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR; 72 } 73 74 /** 75 * Given a calendar year, return the GMT epoch seconds for midnight 76 * GMT of January 1 of that year. yearToSeconds(1970) == 0. 77 */ 78 int64_t yearToSeconds(int32_t year) { 79 // inefficient but foolproof 80 int64_t s = 0; 81 int32_t y = 1970; 82 while (y < year) { 83 s += secsPerYear(y++); 84 } 85 while (y > year) { 86 s -= secsPerYear(--y); 87 } 88 return s; 89 } 90 91 /** 92 * Given 1970 GMT epoch seconds, return the calendar year containing 93 * that time. secondsToYear(0) == 1970. 94 */ 95 int32_t secondsToYear(int64_t seconds) { 96 // inefficient but foolproof 97 int32_t y = 1970; 98 int64_t s = 0; 99 if (seconds >= 0) { 100 for (;;) { 101 s += secsPerYear(y++); 102 if (s > seconds) break; 103 } 104 --y; 105 } else { 106 for (;;) { 107 s -= secsPerYear(--y); 108 if (s <= seconds) break; 109 } 110 } 111 return y; 112 } 113 114 //-------------------------------------------------------------------- 115 // Types 116 //-------------------------------------------------------------------- 117 118 struct FinalZone; 119 struct FinalRule; 120 struct SimplifiedZoneType; 121 122 // A transition from one ZoneType to another 123 // Minimal size = 5 bytes (4+1) 124 struct Transition { 125 int64_t time; // seconds, 1970 epoch 126 int32_t type; // index into 'ZoneInfo.types' 0..255 127 Transition(int64_t _time, int32_t _type) { 128 time = _time; 129 type = _type; 130 } 131 }; 132 133 // A behavior mode (what zic calls a 'type') of a time zone. 134 // Minimal size = 6 bytes (4+1+3bits) 135 // SEE: SimplifiedZoneType 136 struct ZoneType { 137 int64_t rawoffset; // raw seconds offset from GMT 138 int64_t dstoffset; // dst seconds offset from GMT 139 140 // We don't really need any of the following, but they are 141 // retained for possible future use. See SimplifiedZoneType. 142 int32_t abbr; // index into ZoneInfo.abbrs 0..n-1 143 bool isdst; 144 bool isstd; 145 bool isgmt; 146 147 ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList 148 149 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {} 150 151 // A restricted equality, of just the raw and dst offset 152 bool matches(const ZoneType& other) { 153 return rawoffset == other.rawoffset && 154 dstoffset == other.dstoffset; 155 } 156 }; 157 158 // A collection of transitions from one ZoneType to another, together 159 // with a list of the ZoneTypes. A ZoneInfo object may have a long 160 // list of transitions between a smaller list of ZoneTypes. 161 // 162 // This object represents the contents of a single zic-created 163 // zoneinfo file. 164 struct ZoneInfo { 165 vector<Transition> transitions; 166 vector<ZoneType> types; 167 vector<string> abbrs; 168 169 string finalRuleID; 170 int32_t finalOffset; 171 int32_t finalYear; // -1 if none 172 173 // If this is an alias, then all other fields are meaningless, and 174 // this field will point to the "real" zone 0..n-1. 175 int32_t aliasTo; // -1 if this is a "real" zone 176 177 // If there are aliases TO this zone, then the following set will 178 // contain their index numbers (each index >= 0). 179 set<int32_t> aliases; 180 181 ZoneInfo() : finalYear(-1), aliasTo(-1) {} 182 183 void mergeFinalData(const FinalZone& fz); 184 185 void optimizeTypeList(); 186 187 // Set this zone to be an alias TO another zone. 188 void setAliasTo(int32_t index); 189 190 // Clear the list of aliases OF this zone. 191 void clearAliases(); 192 193 // Add an alias to the list of aliases OF this zone. 194 void addAlias(int32_t index); 195 196 // Is this an alias to another zone? 197 bool isAlias() const { 198 return aliasTo >= 0; 199 } 200 201 // Retrieve alias list 202 const set<int32_t>& getAliases() const { 203 return aliases; 204 } 205 206 void print(ostream& os, const string& id) const; 207 }; 208 209 void ZoneInfo::clearAliases() { 210 assert(aliasTo < 0); 211 aliases.clear(); 212 } 213 214 void ZoneInfo::addAlias(int32_t index) { 215 assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end()); 216 aliases.insert(index); 217 } 218 219 void ZoneInfo::setAliasTo(int32_t index) { 220 assert(index >= 0); 221 assert(aliases.empty()); 222 aliasTo = index; 223 } 224 225 typedef map<string, ZoneInfo> ZoneMap; 226 227 typedef ZoneMap::const_iterator ZoneMapIter; 228 229 //-------------------------------------------------------------------- 230 // ZONEINFO 231 //-------------------------------------------------------------------- 232 233 // Global map holding all our ZoneInfo objects, indexed by id. 234 ZoneMap ZONEINFO; 235 236 //-------------------------------------------------------------------- 237 // zoneinfo file parsing 238 //-------------------------------------------------------------------- 239 240 // Read zic-coded 32-bit integer from file 241 int64_t readcoded(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 242 int64_t maxv=numeric_limits<int64_t>::max()) { 243 unsigned char buf[4]; // must be UNSIGNED 244 int64_t val=0; 245 file.read(reinterpret_cast<char*>(buf), 4); 246 for(int32_t i=0,shift=24;i<4;++i,shift-=8) { 247 val |= buf[i] << shift; 248 } 249 if (val < minv || val > maxv) { 250 ostringstream os; 251 os << "coded value out-of-range: " << val << ", expected [" 252 << minv << ", " << maxv << "]"; 253 throw out_of_range(os.str()); 254 } 255 return val; 256 } 257 258 // Read zic-coded 64-bit integer from file 259 int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 260 int64_t maxv=numeric_limits<int64_t>::max()) { 261 unsigned char buf[8]; // must be UNSIGNED 262 int64_t val=0; 263 file.read(reinterpret_cast<char*>(buf), 8); 264 for(int32_t i=0,shift=56;i<8;++i,shift-=8) { 265 val |= static_cast<int64_t>(buf[i]) << shift; 266 } 267 if (val < minv || val > maxv) { 268 ostringstream os; 269 os << "coded value out-of-range: " << val << ", expected [" 270 << minv << ", " << maxv << "]"; 271 throw out_of_range(os.str()); 272 } 273 return val; 274 } 275 276 // Read a boolean value 277 bool readbool(ifstream& file) { 278 char c; 279 file.read(&c, 1); 280 if (c!=0 && c!=1) { 281 ostringstream os; 282 os << "boolean value out-of-range: " << static_cast<int32_t>(c); 283 throw out_of_range(os.str()); 284 } 285 return (c!=0); 286 } 287 288 /** 289 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo 290 * @param file an already-open file stream 291 */ 292 void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData) { 293 int32_t i; 294 295 // Check for TZ_ICU_MAGIC signature at file start. If we get a 296 // signature mismatch, it means we're trying to read a file which 297 // isn't a ICU-modified-zic-created zoneinfo file. Typically this 298 // means the user is passing in a "normal" zoneinfo directory, or 299 // a zoneinfo directory that is polluted with other files, or that 300 // the user passed in the wrong directory. 301 char buf[32]; 302 file.read(buf, 4); 303 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) { 304 throw invalid_argument("TZ_ICU_MAGIC signature missing"); 305 } 306 // skip additional Olson byte version 307 file.read(buf, 1); 308 // if '\0', we have just one copy of data, if '2' or '3', there is additional 309 // 64 bit version at the end. 310 if(buf[0]!=0 && buf[0]!='2' && buf[0]!='3') { 311 throw invalid_argument("Bad Olson version info"); 312 } 313 314 // Read reserved bytes. The first of these will be a version byte. 315 file.read(buf, 15); 316 if (*reinterpret_cast<ICUZoneinfoVersion*>(&buf) != TZ_ICU_VERSION) { 317 throw invalid_argument("File version mismatch"); 318 } 319 320 // Read array sizes 321 int64_t isgmtcnt = readcoded(file, 0); 322 int64_t isdstcnt = readcoded(file, 0); 323 int64_t leapcnt = readcoded(file, 0); 324 int64_t timecnt = readcoded(file, 0); 325 int64_t typecnt = readcoded(file, 0); 326 int64_t charcnt = readcoded(file, 0); 327 328 // Confirm sizes that we assume to be equal. These assumptions 329 // are drawn from a reading of the zic source (2003a), so they 330 // should hold unless the zic source changes. 331 if (isgmtcnt != typecnt || isdstcnt != typecnt) { 332 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt"); 333 } 334 335 // Used temporarily to store transition times and types. We need 336 // to do this because the times and types are stored in two 337 // separate arrays. 338 vector<int64_t> transitionTimes(timecnt, -1); // temporary 339 vector<int32_t> transitionTypes(timecnt, -1); // temporary 340 341 // Read transition times 342 for (i=0; i<timecnt; ++i) { 343 if (is64bitData) { 344 transitionTimes[i] = readcoded64(file); 345 } else { 346 transitionTimes[i] = readcoded(file); 347 } 348 } 349 350 // Read transition types 351 for (i=0; i<timecnt; ++i) { 352 unsigned char c; 353 file.read(reinterpret_cast<char*>(&c), 1); 354 int32_t t = static_cast<int32_t>(c); 355 if (t < 0 || t >= typecnt) { 356 ostringstream os; 357 os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]"; 358 throw out_of_range(os.str()); 359 } 360 transitionTypes[i] = t; 361 } 362 363 // Build transitions vector out of corresponding times and types. 364 bool insertInitial = false; 365 if (is64bitData && !ICU44PLUS) { 366 if (timecnt > 0) { 367 int32_t minidx = -1; 368 for (i=0; i<timecnt; ++i) { 369 if (transitionTimes[i] < LOWEST_TIME32) { 370 if (minidx == -1 || transitionTimes[i] > transitionTimes[minidx]) { 371 // Preserve the latest transition before the 32bit minimum time 372 minidx = i; 373 } 374 } else if (transitionTimes[i] > HIGHEST_TIME32) { 375 // Skipping the rest of the transition data. We cannot put such 376 // transitions into zoneinfo.res, because data is limited to signed 377 // 32bit int by the ICU resource bundle. 378 break; 379 } else { 380 info.transitions.emplace_back(transitionTimes[i], transitionTypes[i]); 381 } 382 } 383 384 if (minidx != -1) { 385 // If there are any transitions before the 32bit minimum time, 386 // put the type information with the 32bit minimum time 387 vector<Transition>::iterator itr = info.transitions.begin(); 388 info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx])); 389 } else { 390 // Otherwise, we need insert the initial type later 391 insertInitial = true; 392 } 393 } 394 } else { 395 for (i=0; i<timecnt; ++i) { 396 info.transitions.emplace_back(transitionTimes[i], transitionTypes[i]); 397 } 398 } 399 400 // Read types (except for the isdst and isgmt flags, which come later (why??)) 401 for (i=0; i<typecnt; ++i) { 402 ZoneType type; 403 404 type.rawoffset = readcoded(file); 405 type.dstoffset = readcoded(file); 406 type.isdst = readbool(file); 407 408 unsigned char c; 409 file.read(reinterpret_cast<char*>(&c), 1); 410 type.abbr = static_cast<int32_t>(c); 411 412 if (type.isdst != (type.dstoffset != 0)) { 413 throw invalid_argument("isdst does not reflect dstoffset"); 414 } 415 416 info.types.push_back(type); 417 } 418 419 assert(info.types.size() == (unsigned) typecnt); 420 421 if (insertInitial) { 422 assert(timecnt > 0); 423 assert(typecnt > 0); 424 425 int32_t initialTypeIdx = -1; 426 427 // Check if the first type is not dst 428 if (info.types.at(0).dstoffset != 0) { 429 // Initial type's rawoffset is same with the rawoffset after the 430 // first transition, but no DST is observed. 431 int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset; 432 // Look for matching type 433 for (i = 0; i < static_cast<int32_t>(info.types.size()); ++i) { 434 if (info.types.at(i).rawoffset == rawoffset0 435 && info.types.at(i).dstoffset == 0) { 436 initialTypeIdx = i; 437 break; 438 } 439 } 440 } else { 441 initialTypeIdx = 0; 442 } 443 assert(initialTypeIdx >= 0); 444 // Add the initial type associated with the lowest int32 time 445 vector<Transition>::iterator itr = info.transitions.begin(); 446 info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx)); 447 } 448 449 450 // Read the abbreviation string 451 if (charcnt) { 452 // All abbreviations are concatenated together, with a 0 at 453 // the end of each abbr. 454 char* str = new char[charcnt + 8]; 455 file.read(str, charcnt); 456 457 // Split abbreviations apart into individual strings. Record 458 // offset of each abbr in a vector. 459 vector<int32_t> abbroffset; 460 char *limit=str+charcnt; 461 for (char* p=str; p<limit; ++p) { 462 char* start = p; 463 while (*p != 0) ++p; 464 info.abbrs.emplace_back(start, p - start); 465 abbroffset.push_back(start-str); 466 } 467 468 // Remap all the abbrs. Old value is offset into concatenated 469 // raw abbr strings. New value is index into vector of 470 // strings. E.g., 0,5,10,14 => 0,1,2,3. 471 472 // Keep track of which abbreviations get used. 473 vector<bool> abbrseen(abbroffset.size(), false); 474 475 for (vector<ZoneType>::iterator it=info.types.begin(); 476 it!=info.types.end(); 477 ++it) { 478 vector<int32_t>::const_iterator x= 479 find(abbroffset.begin(), abbroffset.end(), it->abbr); 480 if (x==abbroffset.end()) { 481 // TODO: Modify code to add a new string to the end of 482 // the abbr list when a middle offset is given, e.g., 483 // "abc*def*" where * == '\0', take offset of 1 and 484 // make the array "abc", "def", "bc", and translate 1 485 // => 2. NOT CRITICAL since we don't even use the 486 // abbr at this time. 487 #if 0 488 // TODO: Re-enable this warning if we start using 489 // the Olson abbr data, or if the above TODO is completed. 490 ostringstream os; 491 os << "Warning: unusual abbr offset " << it->abbr 492 << ", expected one of"; 493 for (vector<int32_t>::const_iterator y=abbroffset.begin(); 494 y!=abbroffset.end(); ++y) { 495 os << ' ' << *y; 496 } 497 cerr << os.str() << "; using 0" << endl; 498 #endif 499 it->abbr = 0; 500 } else { 501 int32_t index = x - abbroffset.begin(); 502 it->abbr = index; 503 abbrseen[index] = true; 504 } 505 } 506 507 for (int32_t ii = 0; ii < static_cast<int32_t>(abbrseen.size()); ++ii) { 508 if (!abbrseen[ii]) { 509 cerr << "Warning: unused abbreviation: " << ii << endl; 510 } 511 } 512 } 513 514 // Read leap second info, if any. 515 // *** We discard leap second data. *** 516 for (i=0; i<leapcnt; ++i) { 517 readcoded(file); // transition time 518 readcoded(file); // total correction after above 519 } 520 521 // Read isstd flags 522 for (i=0; i<typecnt; ++i) info.types[i].isstd = readbool(file); 523 524 // Read isgmt flags 525 for (i=0; i<typecnt; ++i) info.types[i].isgmt = readbool(file); 526 } 527 528 //-------------------------------------------------------------------- 529 // Directory and file reading 530 //-------------------------------------------------------------------- 531 532 /** 533 * Process a single zoneinfo file, adding the data to ZONEINFO 534 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles" 535 * @param id the zone ID, e.g., "America/Los_Angeles" 536 */ 537 void handleFile(string path, string id) { 538 // Check for duplicate id 539 if (ZONEINFO.find(id) != ZONEINFO.end()) { 540 ostringstream os; 541 os << "duplicate zone ID: " << id; 542 throw invalid_argument(os.str()); 543 } 544 545 ifstream file(path.c_str(), ios::in | ios::binary); 546 if (!file) { 547 throw invalid_argument("can't open file"); 548 } 549 550 // eat 32bit data part 551 ZoneInfo info; 552 readzoneinfo(file, info, false); 553 554 // Check for errors 555 if (!file) { 556 throw invalid_argument("read error"); 557 } 558 559 // we only use 64bit part 560 ZoneInfo info64; 561 readzoneinfo(file, info64, true); 562 563 bool alldone = false; 564 int64_t eofPos = (int64_t) file.tellg(); 565 566 // '\n' + <envvar string> + '\n' after the 64bit version data 567 char ch = file.get(); 568 if (ch == 0x0a) { 569 bool invalidchar = false; 570 while (file.get(ch)) { 571 if (ch == 0x0a) { 572 break; 573 } 574 if (ch < 0x20) { 575 // must be printable ascii 576 invalidchar = true; 577 break; 578 } 579 } 580 if (!invalidchar) { 581 eofPos = (int64_t) file.tellg(); 582 file.seekg(0, ios::end); 583 eofPos = eofPos - (int64_t) file.tellg(); 584 if (eofPos == 0) { 585 alldone = true; 586 } 587 } 588 } 589 if (!alldone) { 590 ostringstream os; 591 os << (-eofPos) << " unprocessed bytes at end"; 592 throw invalid_argument(os.str()); 593 } 594 595 ZONEINFO[id] = info64; 596 } 597 598 /** 599 * Recursively scan the given directory, calling handleFile() for each 600 * file in the tree. The user should call with the root directory and 601 * a prefix of "". The function will call itself with non-empty 602 * prefix values. 603 */ 604 #ifdef WIN32 605 606 void scandir(string dirname, string prefix="") { 607 HANDLE hList; 608 WIN32_FIND_DATA FileData; 609 610 // Get the first file 611 hList = FindFirstFile((dirname + "\\*").c_str(), &FileData); 612 if (hList == INVALID_HANDLE_VALUE) { 613 cerr << "Error: Invalid directory: " << dirname << endl; 614 exit(1); 615 } 616 for (;;) { 617 string name(FileData.cFileName); 618 string path(dirname + "\\" + name); 619 if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { 620 if (name != "." && name != "..") { 621 scandir(path, prefix + name + "/"); 622 } 623 } else { 624 try { 625 string id = prefix + name; 626 handleFile(path, id); 627 } catch (const exception& e) { 628 cerr << "Error: While processing \"" << path << "\", " 629 << e.what() << endl; 630 exit(1); 631 } 632 } 633 634 if (!FindNextFile(hList, &FileData)) { 635 if (GetLastError() == ERROR_NO_MORE_FILES) { 636 break; 637 } // else...? 638 } 639 } 640 FindClose(hList); 641 } 642 643 #else 644 645 void scandir(string dir, string prefix="") { 646 DIR *dp; 647 struct dirent *dir_entry; 648 struct stat stat_info; 649 char pwd[512]; 650 vector<string> subdirs; 651 vector<string> subfiles; 652 653 if ((dp = opendir(dir.c_str())) == nullptr) { 654 cerr << "Error: Invalid directory: " << dir << endl; 655 exit(1); 656 } 657 if (!getcwd(pwd, sizeof(pwd))) { 658 cerr << "Error: Directory name too long" << endl; 659 exit(1); 660 } 661 chdir(dir.c_str()); 662 while ((dir_entry = readdir(dp)) != nullptr) { 663 string name = dir_entry->d_name; 664 string path = dir + "/" + name; 665 lstat(dir_entry->d_name,&stat_info); 666 if (S_ISDIR(stat_info.st_mode)) { 667 if (name != "." && name != "..") { 668 subdirs.push_back(path); 669 subdirs.push_back(prefix + name + "/"); 670 // scandir(path, prefix + name + "/"); 671 } 672 } else { 673 try { 674 string id = prefix + name; 675 subfiles.push_back(path); 676 subfiles.push_back(id); 677 // handleFile(path, id); 678 } catch (const exception& e) { 679 cerr << "Error: While processing \"" << path << "\", " 680 << e.what() << endl; 681 exit(1); 682 } 683 } 684 } 685 closedir(dp); 686 chdir(pwd); 687 688 for (int32_t i = 0; i < static_cast<int32_t>(subfiles.size()); i += 2) { 689 try { 690 handleFile(subfiles[i], subfiles[i+1]); 691 } catch (const exception& e) { 692 cerr << "Error: While processing \"" << subfiles[i] << "\", " 693 << e.what() << endl; 694 exit(1); 695 } 696 } 697 for (int32_t i = 0; i < static_cast<int32_t>(subdirs.size()); i += 2) { 698 scandir(subdirs[i], subdirs[i+1]); 699 } 700 } 701 702 #endif 703 704 //-------------------------------------------------------------------- 705 // Final zone and rule info 706 //-------------------------------------------------------------------- 707 708 /** 709 * Read and discard the current line. 710 */ 711 void consumeLine(istream& in) { 712 int32_t c; 713 do { 714 c = in.get(); 715 } while (c != EOF && c != '\n'); 716 } 717 718 enum { 719 DOM = 0, 720 DOWGEQ = 1, 721 DOWLEQ = 2 722 }; 723 724 const char* TIME_MODE[] = {"w", "s", "u"}; 725 726 // Allow 29 days in February because zic outputs February 29 727 // for rules like "last Sunday in February". 728 const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; 729 730 const int32_t HOUR = 3600; 731 732 struct FinalZone { 733 int32_t offset; // raw offset 734 int32_t year; // takes effect for y >= year 735 string ruleid; 736 set<string> aliases; 737 FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) : 738 offset(_offset), year(_year), ruleid(_ruleid) { 739 if (offset <= -16*HOUR || offset >= 16*HOUR) { 740 ostringstream os; 741 os << "Invalid input offset " << offset 742 << " for year " << year 743 << " and rule ID " << ruleid; 744 throw invalid_argument(os.str()); 745 } 746 if (year < 1900) { 747 ostringstream os; 748 os << "Invalid input year " << year 749 << " with offset " << offset 750 << " and rule ID " << ruleid; 751 throw invalid_argument(os.str()); 752 } 753 } 754 FinalZone() : offset(-1), year(-1) {} 755 void addLink(const string& alias) { 756 if (aliases.find(alias) != aliases.end()) { 757 ostringstream os; 758 os << "Duplicate alias " << alias; 759 throw invalid_argument(os.str()); 760 } 761 aliases.insert(alias); 762 } 763 }; 764 765 struct FinalRulePart { 766 int32_t mode; 767 int32_t month; 768 int32_t dom; 769 int32_t dow; 770 int32_t time; 771 int32_t offset; // dst offset, usually either 0 or 1:00 772 773 // Isstd and isgmt only have 3 valid states, corresponding to local 774 // wall time, local standard time, and GMT standard time. 775 // Here is how the isstd & isgmt flags are set by zic: 776 //| case 's': /* Standard */ 777 //| rp->r_todisstd = true; 778 //| rp->r_todisgmt = false; 779 //| case 'w': /* Wall */ 780 //| rp->r_todisstd = false; 781 //| rp->r_todisgmt = false; 782 //| case 'g': /* Greenwich */ 783 //| case 'u': /* Universal */ 784 //| case 'z': /* Zulu */ 785 //| rp->r_todisstd = true; 786 //| rp->r_todisgmt = true; 787 bool isstd; 788 bool isgmt; 789 790 bool isset; // used during building; later ignored 791 792 FinalRulePart() : isset(false) {} 793 void set(const string& id, 794 const string& _mode, 795 int32_t _month, 796 int32_t _dom, 797 int32_t _dow, 798 int32_t _time, 799 bool _isstd, 800 bool _isgmt, 801 int32_t _offset) { 802 if (isset) { 803 throw invalid_argument("FinalRulePart set twice"); 804 } 805 isset = true; 806 if (_mode == "DOWLEQ") { 807 mode = DOWLEQ; 808 } else if (_mode == "DOWGEQ") { 809 mode = DOWGEQ; 810 } else if (_mode == "DOM") { 811 mode = DOM; 812 } else { 813 throw invalid_argument("Unrecognized FinalRulePart mode"); 814 } 815 month = _month; 816 dom = _dom; 817 dow = _dow; 818 time = _time; 819 isstd = _isstd; 820 isgmt = _isgmt; 821 offset = _offset; 822 823 ostringstream os; 824 if (month < 0 || month >= 12) { 825 os << "Invalid input month " << month; 826 } 827 if (dom < 1 || dom > MONTH_LEN[month]) { 828 os << "Invalid input day of month " << dom; 829 } 830 if (mode != DOM && (dow < 0 || dow >= 7)) { 831 os << "Invalid input day of week " << dow; 832 } 833 if (offset < (-1 * HOUR) || offset > (2 * HOUR)) { 834 os << "Invalid input offset " << offset; 835 } 836 if (isgmt && !isstd) { 837 os << "Invalid input isgmt && !isstd"; 838 } 839 if (!os.str().empty()) { 840 os << " for rule " 841 << id 842 << _mode 843 << month << dom << dow << time 844 << isstd << isgmt 845 << offset; 846 throw invalid_argument(os.str()); 847 } 848 } 849 850 /** 851 * Return the time mode as an ICU SimpleTimeZone int from 0..2; 852 * see simpletz.h. 853 */ 854 int32_t timemode() const { 855 if (isgmt) { 856 assert(isstd); 857 return 2; // gmt standard 858 } 859 if (isstd) { 860 return 1; // local standard 861 } 862 return 0; // local wall 863 } 864 865 // The SimpleTimeZone encoding method for rules is as follows: 866 // stz_dowim stz_dow 867 // DOM: dom 0 868 // DOWGEQ: dom -(dow+1) 869 // DOWLEQ: -dom -(dow+1) 870 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2 871 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2 872 // to encode 7, use stz_dowim=7, stz_dow=0 873 // Note that for this program and for SimpleTimeZone, 0==Jan, 874 // but for this program 0==Sun while for SimpleTimeZone 1==Sun. 875 876 /** 877 * Return a "dowim" param suitable for SimpleTimeZone. 878 */ 879 int32_t stz_dowim() const { 880 return (mode == DOWLEQ) ? -dom : dom; 881 } 882 883 /** 884 * Return a "dow" param suitable for SimpleTimeZone. 885 */ 886 int32_t stz_dow() const { 887 return (mode == DOM) ? 0 : -(dow+1); 888 } 889 }; 890 891 struct FinalRule { 892 FinalRulePart part[2]; 893 894 bool isset() const { 895 return part[0].isset && part[1].isset; 896 } 897 898 void print(ostream& os) const; 899 }; 900 901 map<string,FinalZone> finalZones; 902 map<string,FinalRule> finalRules; 903 904 map<string, set<string> > links; 905 map<string, string> reverseLinks; 906 907 /** 908 * Predicate used to find FinalRule objects that do not have both 909 * sub-parts set (indicating an error in the input file). 910 */ 911 bool isNotSet(const pair<const string,FinalRule>& p) { 912 return !p.second.isset(); 913 } 914 915 /** 916 * Predicate used to find FinalZone objects that do not map to a known 917 * rule (indicating an error in the input file). 918 */ 919 bool mapsToUnknownRule(const pair<const string,FinalZone>& p) { 920 return finalRules.find(p.second.ruleid) == finalRules.end(); 921 } 922 923 /** 924 * This set is used to make sure each rule in finalRules is used at 925 * least once. First we populate it with all the rules from 926 * finalRules; then we remove all the rules referred to in 927 * finaleZones. 928 */ 929 set<string> ruleIDset; 930 931 void insertRuleID(const pair<string,FinalRule>& p) { 932 ruleIDset.insert(p.first); 933 } 934 935 void eraseRuleID(const pair<string,FinalZone>& p) { 936 ruleIDset.erase(p.second.ruleid); 937 } 938 939 /** 940 * Populate finalZones and finalRules from the given istream. 941 */ 942 void readFinalZonesAndRules(istream& in) { 943 944 for (;;) { 945 string token; 946 in >> token; 947 if (in.eof() || !in) { 948 break; 949 } else if (token == "zone") { 950 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0) 951 string id, ruleid; 952 int32_t offset, year; 953 in >> id >> offset >> year >> ruleid; 954 consumeLine(in); 955 finalZones[id] = FinalZone(offset, year, ruleid); 956 } else if (token == "rule") { 957 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600 958 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0 959 string id, mode; 960 int32_t month, dom, dow, time, offset; 961 bool isstd, isgmt; 962 in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset; 963 consumeLine(in); 964 FinalRule& fr = finalRules[id]; 965 int32_t p = fr.part[0].isset ? 1 : 0; 966 fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset); 967 } else if (token == "link") { 968 string fromid, toid; // fromid == "real" zone, toid == alias 969 in >> fromid >> toid; 970 // DO NOT consumeLine(in); 971 if (finalZones.find(toid) != finalZones.end()) { 972 throw invalid_argument("Bad link: `to' id is a \"real\" zone"); 973 } 974 975 links[fromid].insert(toid); 976 reverseLinks[toid] = fromid; 977 } else if (token.length() > 0 && token[0] == '#') { 978 consumeLine(in); 979 } else { 980 throw invalid_argument("Unrecognized keyword"); 981 } 982 } 983 984 if (!in.eof() && !in) { 985 throw invalid_argument("Parse failure"); 986 } 987 988 // Perform validity check: Each rule should have data for 2 parts. 989 if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) { 990 throw invalid_argument("One or more incomplete rule pairs"); 991 } 992 993 // Perform validity check: Each zone should map to a known rule. 994 if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) { 995 throw invalid_argument("One or more zones refers to an unknown rule"); 996 } 997 998 // Perform validity check: Each rule should be referred to by a zone. 999 ruleIDset.clear(); 1000 for_each(finalRules.begin(), finalRules.end(), insertRuleID); 1001 for_each(finalZones.begin(), finalZones.end(), eraseRuleID); 1002 if (!ruleIDset.empty()) { 1003 throw invalid_argument("Unused rules"); 1004 } 1005 } 1006 1007 //-------------------------------------------------------------------- 1008 // Resource bundle output 1009 //-------------------------------------------------------------------- 1010 1011 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT 1012 1013 void ZoneInfo::print(ostream& os, const string& id) const { 1014 // Implement compressed format #2: 1015 os << " /* " << id << " */ "; 1016 1017 if (aliasTo >= 0) { 1018 assert(aliases.empty()); 1019 os << ":int { " << aliasTo << " } "; // No endl - save room for comment. 1020 return; 1021 } 1022 1023 if (ICU44PLUS) { 1024 os << ":table {" << endl; 1025 } else { 1026 os << ":array {" << endl; 1027 } 1028 1029 vector<Transition>::const_iterator trn; 1030 vector<ZoneType>::const_iterator typ; 1031 1032 bool first; 1033 1034 if (ICU44PLUS) { 1035 trn = transitions.begin(); 1036 1037 // pre 32bit transitions 1038 if (trn != transitions.end() && trn->time < LOWEST_TIME32) { 1039 os << " transPre32:intvector { "; 1040 for (first = true; trn != transitions.end() && trn->time < LOWEST_TIME32; ++trn) { 1041 if (!first) { 1042 os<< ", "; 1043 } 1044 first = false; 1045 os << static_cast<int32_t>(trn->time >> 32) << ", " << static_cast<int32_t>(trn->time & 0x00000000ffffffff); 1046 } 1047 os << " }" << endl; 1048 } 1049 1050 // 32bit transitions 1051 if (trn != transitions.end() && trn->time < HIGHEST_TIME32) { 1052 os << " trans:intvector { "; 1053 for (first = true; trn != transitions.end() && trn->time < HIGHEST_TIME32; ++trn) { 1054 if (!first) { 1055 os << ", "; 1056 } 1057 first = false; 1058 os << trn->time; 1059 } 1060 os << " }" << endl; 1061 } 1062 1063 // post 32bit transitions 1064 if (trn != transitions.end()) { 1065 os << " transPost32:intvector { "; 1066 for (first = true; trn != transitions.end(); ++trn) { 1067 if (!first) { 1068 os<< ", "; 1069 } 1070 first = false; 1071 os << static_cast<int32_t>(trn->time >> 32) << ", " << static_cast<int32_t>(trn->time & 0x00000000ffffffff); 1072 } 1073 os << " }" << endl; 1074 } 1075 } else { 1076 os << " :intvector { "; 1077 for (trn = transitions.begin(), first = true; trn != transitions.end(); ++trn) { 1078 if (!first) os << ", "; 1079 first = false; 1080 os << trn->time; 1081 } 1082 os << " }" << endl; 1083 } 1084 1085 1086 first=true; 1087 if (ICU44PLUS) { 1088 os << " typeOffsets:intvector { "; 1089 } else { 1090 os << " :intvector { "; 1091 } 1092 for (typ = types.begin(); typ != types.end(); ++typ) { 1093 if (!first) os << ", "; 1094 first = false; 1095 os << typ->rawoffset << ", " << typ->dstoffset; 1096 } 1097 os << " }" << endl; 1098 1099 if (ICU44PLUS) { 1100 if (!transitions.empty()) { 1101 os << " typeMap:bin { \"" << hex << setfill('0'); 1102 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1103 os << setw(2) << trn->type; 1104 } 1105 os << dec << "\" }" << endl; 1106 } 1107 } else { 1108 os << " :bin { \"" << hex << setfill('0'); 1109 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1110 os << setw(2) << trn->type; 1111 } 1112 os << dec << "\" }" << endl; 1113 } 1114 1115 // Final zone info, if any 1116 if (finalYear != -1) { 1117 if (ICU44PLUS) { 1118 os << " finalRule { \"" << finalRuleID << "\" }" << endl; 1119 os << " finalRaw:int { " << finalOffset << " }" << endl; 1120 os << " finalYear:int { " << finalYear << " }" << endl; 1121 } else { 1122 os << " \"" << finalRuleID << "\"" << endl; 1123 os << " :intvector { " << finalOffset << ", " 1124 << finalYear << " }" << endl; 1125 } 1126 } 1127 1128 // Alias list, if any 1129 if (!aliases.empty()) { 1130 first = true; 1131 if (ICU44PLUS) { 1132 os << " links:intvector { "; 1133 } else { 1134 os << " :intvector { "; 1135 } 1136 for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { 1137 if (!first) os << ", "; 1138 first = false; 1139 os << *i; 1140 } 1141 os << " }" << endl; 1142 } 1143 1144 os << " } "; // no trailing 'endl', so comments can be placed. 1145 } 1146 1147 inline ostream& 1148 operator<<(ostream& os, const ZoneMap& zoneinfo) { 1149 int32_t c = 0; 1150 for (ZoneMapIter it = zoneinfo.begin(); 1151 it != zoneinfo.end(); 1152 ++it) { 1153 if(c && !ICU44PLUS) os << ","; 1154 it->second.print(os, it->first); 1155 os << "//Z#" << c++ << endl; 1156 } 1157 return os; 1158 } 1159 1160 // print the string list 1161 ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { 1162 int32_t n = 0; // count 1163 int32_t col = 0; // column 1164 os << " Names {" << endl 1165 << " "; 1166 for (ZoneMapIter it = zoneinfo.begin(); 1167 it != zoneinfo.end(); 1168 ++it) { 1169 if(n) { 1170 os << ","; 1171 col ++; 1172 } 1173 const string& id = it->first; 1174 os << "\"" << id << "\""; 1175 col += id.length() + 2; 1176 if(col >= 50) { 1177 os << " // " << n << endl 1178 << " "; 1179 col = 0; 1180 } 1181 n++; 1182 } 1183 os << " // " << (n-1) << endl 1184 << " }" << endl; 1185 1186 return os; 1187 } 1188 1189 //-------------------------------------------------------------------- 1190 // main 1191 //-------------------------------------------------------------------- 1192 1193 /** 1194 * A zone type that contains only the raw and dst offset. Used by the 1195 * optimizeTypeList() method. 1196 */ 1197 struct SimplifiedZoneType { 1198 int64_t rawoffset; 1199 int64_t dstoffset; 1200 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {} 1201 SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset), 1202 dstoffset(t.dstoffset) {} 1203 bool operator<(const SimplifiedZoneType& t) const { 1204 return rawoffset < t.rawoffset || 1205 (rawoffset == t.rawoffset && 1206 dstoffset < t.dstoffset); 1207 } 1208 }; 1209 1210 /** 1211 * Construct a ZoneType from a SimplifiedZoneType. Note that this 1212 * discards information; the new ZoneType will have meaningless 1213 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate, 1214 * since ignoring these is how we do optimization (we have no use for 1215 * these in historical transitions). 1216 */ 1217 ZoneType::ZoneType(const SimplifiedZoneType& t) : 1218 rawoffset(t.rawoffset), dstoffset(t.dstoffset), 1219 abbr(-1), isdst(false), isstd(false), isgmt(false) {} 1220 1221 /** 1222 * Optimize the type list to remove excess entries. The type list may 1223 * contain entries that are distinct only in terms of their dst, std, 1224 * or gmt flags. Since we don't care about those flags, we can reduce 1225 * the type list to a set of unique raw/dst offset pairs, and remap 1226 * the type indices in the transition list, which stores, for each 1227 * transition, a transition time and a type index. 1228 */ 1229 void ZoneInfo::optimizeTypeList() { 1230 // Assemble set of unique types; only those in the `transitions' 1231 // list, since there may be unused types in the `types' list 1232 // corresponding to transitions that have been trimmed (during 1233 // merging of final data). 1234 1235 if (aliasTo >= 0) return; // Nothing to do for aliases 1236 1237 if (!ICU44PLUS) { 1238 // This is the old logic which has a bug, which occasionally removes 1239 // the type before the first transition. The problem was fixed 1240 // by inserting the dummy transition indirectly. 1241 1242 // If there are zero transitions and one type, then leave that as-is. 1243 if (transitions.empty()) { 1244 if (types.size() != 1) { 1245 cerr << "Error: transition count = 0, type count = " << types.size() << endl; 1246 } 1247 return; 1248 } 1249 1250 set<SimplifiedZoneType> simpleset; 1251 for (vector<Transition>::const_iterator i=transitions.begin(); 1252 i!=transitions.end(); ++i) { 1253 assert(i->type < (int32_t)types.size()); 1254 simpleset.insert(types[i->type]); 1255 } 1256 1257 // Map types to integer indices 1258 map<SimplifiedZoneType,int32_t> simplemap; 1259 int32_t n=0; 1260 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); 1261 i!=simpleset.end(); ++i) { 1262 simplemap[*i] = n++; 1263 } 1264 1265 // Remap transitions 1266 for (vector<Transition>::iterator i=transitions.begin(); 1267 i!=transitions.end(); ++i) { 1268 assert(i->type < (int32_t)types.size()); 1269 ZoneType oldtype = types[i->type]; 1270 SimplifiedZoneType newtype(oldtype); 1271 assert(simplemap.find(newtype) != simplemap.end()); 1272 i->type = simplemap[newtype]; 1273 } 1274 1275 // Replace type list 1276 types.clear(); 1277 copy(simpleset.begin(), simpleset.end(), back_inserter(types)); 1278 1279 } else { 1280 if (types.size() > 1) { 1281 // Note: localtime uses the very first non-dst type as initial offsets. 1282 // If all types are DSTs, the very first type is treated as the initial offsets. 1283 1284 // Decide a type used as the initial offsets. ICU put the type at index 0. 1285 ZoneType initialType = types[0]; 1286 for (vector<ZoneType>::const_iterator i=types.begin(); i!=types.end(); ++i) { 1287 if (i->dstoffset == 0) { 1288 initialType = *i; 1289 break; 1290 } 1291 } 1292 1293 SimplifiedZoneType initialSimplifiedType(initialType); 1294 1295 // create a set of unique types, but ignoring fields which we're not interested in 1296 set<SimplifiedZoneType> simpleset; 1297 simpleset.insert(initialSimplifiedType); 1298 for (vector<Transition>::const_iterator i=transitions.begin(); i!=transitions.end(); ++i) { 1299 assert(i->type < (int32_t)types.size()); 1300 simpleset.insert(types[i->type]); 1301 } 1302 1303 // Map types to integer indices, however, keeping the first type at offset 0 1304 map<SimplifiedZoneType,int32_t> simplemap; 1305 simplemap[initialSimplifiedType] = 0; 1306 int32_t n = 1; 1307 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { 1308 if (*i < initialSimplifiedType || initialSimplifiedType < *i) { 1309 simplemap[*i] = n++; 1310 } 1311 } 1312 1313 // Remap transitions 1314 for (vector<Transition>::iterator i=transitions.begin(); 1315 i!=transitions.end(); ++i) { 1316 assert(i->type < (int32_t)types.size()); 1317 ZoneType oldtype = types[i->type]; 1318 SimplifiedZoneType newtype(oldtype); 1319 assert(simplemap.find(newtype) != simplemap.end()); 1320 i->type = simplemap[newtype]; 1321 } 1322 1323 // Replace type list 1324 types.clear(); 1325 types.emplace_back(initialSimplifiedType); 1326 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { 1327 if (*i < initialSimplifiedType || initialSimplifiedType < *i) { 1328 types.emplace_back(*i); 1329 } 1330 } 1331 1332 // Reiterating transitions to remove any transitions which 1333 // do not actually change the raw/dst offsets 1334 int32_t prevTypeIdx = 0; 1335 for (vector<Transition>::iterator i=transitions.begin(); i!=transitions.end();) { 1336 if (i->type == prevTypeIdx) { 1337 // this is not a time transition, probably just name change 1338 // e.g. America/Resolute after 2006 in 2010b 1339 transitions.erase(i); 1340 } else { 1341 prevTypeIdx = i->type; 1342 i++; 1343 } 1344 } 1345 } 1346 } 1347 1348 } 1349 1350 /** 1351 * Merge final zone data into this zone. 1352 */ 1353 void ZoneInfo::mergeFinalData(const FinalZone& fz) { 1354 int32_t year = fz.year; 1355 int64_t seconds = yearToSeconds(year); 1356 1357 if (!ICU44PLUS) { 1358 if (seconds > HIGHEST_TIME32) { 1359 // Avoid transitions beyond signed 32bit max second. 1360 // This may result incorrect offset computation around 1361 // HIGHEST_TIME32. This is a limitation of ICU 1362 // before 4.4. 1363 seconds = HIGHEST_TIME32; 1364 } 1365 } 1366 1367 vector<Transition>::iterator it = 1368 find_if(transitions.begin(), transitions.end(), 1369 [seconds](const Transition& t) { return t.time >= seconds; }); 1370 transitions.erase(it, transitions.end()); 1371 1372 if (finalYear != -1) { 1373 throw invalid_argument("Final zone already merged in"); 1374 } 1375 finalYear = fz.year; 1376 finalOffset = fz.offset; 1377 finalRuleID = fz.ruleid; 1378 } 1379 1380 /** 1381 * Merge the data from the given final zone into the core zone data by 1382 * calling the ZoneInfo member function mergeFinalData. 1383 */ 1384 void mergeOne(const string& zoneid, const FinalZone& fz) { 1385 if (ZONEINFO.find(zoneid) == ZONEINFO.end()) { 1386 throw invalid_argument("Unrecognized final zone ID"); 1387 } 1388 ZONEINFO[zoneid].mergeFinalData(fz); 1389 } 1390 1391 /** 1392 * Visitor function that merges the final zone data into the main zone 1393 * data structures. It calls mergeOne for each final zone and its 1394 * list of aliases. 1395 */ 1396 void mergeFinalZone(const pair<string,FinalZone>& p) { 1397 const string& id = p.first; 1398 const FinalZone& fz = p.second; 1399 1400 mergeOne(id, fz); 1401 } 1402 1403 /** 1404 * Print this rule in resource bundle format to os. ID and enclosing 1405 * braces handled elsewhere. 1406 */ 1407 void FinalRule::print(ostream& os) const { 1408 // First print the rule part that enters DST; then the rule part 1409 // that exits it. 1410 int32_t whichpart = (part[0].offset != 0) ? 0 : 1; 1411 assert(part[whichpart].offset != 0); 1412 assert(part[1-whichpart].offset == 0); 1413 1414 os << " "; 1415 for (int32_t i=0; i<2; ++i) { 1416 const FinalRulePart& p = part[whichpart]; 1417 whichpart = 1-whichpart; 1418 os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", " 1419 << p.time << ", " << p.timemode() << ", "; 1420 } 1421 os << part[whichpart].offset << endl; 1422 } 1423 1424 #define ICU_ZONE_OVERRIDE_SUFFIX "--ICU" 1425 #define ICU_ZONE_OVERRIDE_SUFFIX_LEN 5 1426 1427 int main(int argc, char *argv[]) { 1428 string rootpath, zonetab, version; 1429 bool validArgs = false; 1430 1431 if (argc == 4 || argc == 5) { 1432 validArgs = true; 1433 rootpath = argv[1]; 1434 zonetab = argv[2]; 1435 version = argv[3]; 1436 if (argc == 5) { 1437 if (strcmp(argv[4], "--old") == 0) { 1438 ICU44PLUS = false; 1439 TZ_RESOURCE_NAME = ICU_TZ_RESOURCE_OLD; 1440 } else { 1441 validArgs = false; 1442 } 1443 } 1444 } 1445 if (!validArgs) { 1446 cout << "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl 1447 << " <dir> path to zoneinfo file tree generated by" << endl 1448 << " ICU-patched version of zic" << endl 1449 << " <cmap> country map, from tzdata archive," << endl 1450 << " typically named \"zone.tab\"" << endl 1451 << " <tzver> version string, such as \"2003e\"" << endl 1452 << " --old generating resource format before ICU4.4" << endl; 1453 exit(1); 1454 } 1455 1456 cout << "Olson data version: " << version << endl; 1457 cout << "ICU 4.4+ format: " << (ICU44PLUS ? "Yes" : "No") << endl; 1458 1459 try { 1460 ifstream finals(ICU_ZONE_FILE); 1461 if (finals) { 1462 readFinalZonesAndRules(finals); 1463 1464 cout << "Finished reading " << finalZones.size() 1465 << " final zones and " << finalRules.size() 1466 << " final rules from " ICU_ZONE_FILE << endl; 1467 } else { 1468 cerr << "Error: Unable to open " ICU_ZONE_FILE << endl; 1469 return 1; 1470 } 1471 } catch (const exception& error) { 1472 cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << endl; 1473 return 1; 1474 } 1475 1476 try { 1477 // Recursively scan all files below the given path, accumulating 1478 // their data into ZONEINFO. All files must be TZif files. Any 1479 // failure along the way will result in a call to exit(1). 1480 scandir(rootpath); 1481 } catch (const exception& error) { 1482 cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl; 1483 return 1; 1484 } 1485 1486 cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files [" 1487 << (ZONEINFO.begin())->first << ".." 1488 << (--ZONEINFO.end())->first << "]" << endl; 1489 1490 // Overrides TZ database zones with ICU custom zone definition. 1491 // These ICU zone overrides are defined in icuzones, with suffix --ICU. 1492 // If there is a matching TZ database zone, the zoneinfo is replaced 1493 // with the ICU definition. Then, the zone ID with --ICU suffix 1494 // will be deleted from the final list. 1495 // For example, zoneinfo for Europe/Dublin imported from the TZ database 1496 // will be replaced with the zone definition for Europe/Dublin--ICU 1497 // in icuzones. 1498 1499 // Collect zone IDs to be modified with ICU definition. 1500 vector<string> customZones; 1501 for (ZoneMapIter i = ZONEINFO.begin(); i != ZONEINFO.end(); ++i) { 1502 const string& id = i->first; 1503 size_t idx = id.rfind(ICU_ZONE_OVERRIDE_SUFFIX); 1504 if (idx != string::npos && idx == id.length() - ICU_ZONE_OVERRIDE_SUFFIX_LEN) { 1505 cout << "ICU zone override: " << id << endl; 1506 customZones.push_back(id.substr(0, idx)); 1507 } 1508 } 1509 1510 // 1511 // BEGIN ICU Custom ZoneInfo Override Handling 1512 // 1513 1514 // Replace zoneinfo with ICU definition, then remove ICU zone ID with 1515 // the special suffix. 1516 for (vector<string>::iterator i = customZones.begin(); i != customZones.end(); i++) { 1517 string& origId = *i; 1518 string custId = origId + ICU_ZONE_OVERRIDE_SUFFIX; 1519 1520 map<string,ZoneInfo>::iterator origZi = ZONEINFO.find(origId); 1521 map<string,ZoneInfo>::iterator custZi = ZONEINFO.find(custId); 1522 if (origZi != ZONEINFO.end() && custZi != ZONEINFO.end()) { 1523 // replace original zone info with custom override, 1524 // then delete one custom ID 1525 cout << "Replacing ZoneInfo " << origId << " with " << custId << endl; 1526 origZi->second = custZi->second; 1527 ZONEINFO.erase(custZi); 1528 } 1529 1530 // Also replace final rule 1531 map<string,FinalZone>::iterator origFz = finalZones.find(origId); 1532 map<string,FinalZone>::iterator custFz = finalZones.find(custId); 1533 if (origFz != finalZones.end() && custFz != finalZones.end()) { 1534 // replace original final zone with custom override, 1535 // then delete one for custom ID 1536 cout << "Replacing FinalZone for " << origId << " with " << custId << endl; 1537 origFz->second = custFz->second; 1538 finalZones.erase(custFz); 1539 } 1540 } 1541 1542 // Also remove aliases for ICU custom zoneinfo overrides. 1543 for (map<string,set<string>>::const_iterator i = links.begin(); i != links.end(); ) { 1544 const string& id = i->first; 1545 size_t idx = id.rfind(ICU_ZONE_OVERRIDE_SUFFIX); 1546 if (idx != string::npos && idx == id.length() - ICU_ZONE_OVERRIDE_SUFFIX_LEN) { 1547 const set<string>& aliases = i->second; 1548 // Also remove all revserse links 1549 for (set<string>::const_iterator j = aliases.begin(); j != aliases.end(); j++) { 1550 const string& alias = *j; 1551 cout << "Removing alias " << alias << endl; 1552 reverseLinks.erase(alias); 1553 } 1554 1555 links.erase(i++); 1556 } else { 1557 i++; 1558 } 1559 } 1560 1561 1562 // 1563 // END ICU Custom ZoneInfo Override Handling 1564 // 1565 1566 try { 1567 for_each(finalZones.begin(), finalZones.end(), mergeFinalZone); 1568 } catch (const exception& error) { 1569 cerr << "Error: While merging final zone data: " << error.what() << endl; 1570 return 1; 1571 } 1572 1573 // Process links (including ICU aliases). For each link set we have 1574 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more 1575 // aliases (e.g., PST, PST8PDT, ...). 1576 1577 // 1. Add all aliases as zone objects in ZONEINFO 1578 for (map<string,set<string> >::const_iterator i = links.begin(); 1579 i!=links.end(); ++i) { 1580 const string& olson = i->first; 1581 const set<string>& aliases = i->second; 1582 if (ZONEINFO.find(olson) == ZONEINFO.end()) { 1583 cerr << "Error: Invalid 'Link' to non-existent \"" 1584 << olson << "\"" << endl; 1585 return 1; 1586 } 1587 for (set<string>::const_iterator j=aliases.begin(); 1588 j!=aliases.end(); ++j) { 1589 ZONEINFO[*j] = ZoneInfo(); 1590 } 1591 } 1592 1593 // 2. Create a mapping from zones to index numbers 0..n-1. 1594 map<string,int32_t> zoneIDs; 1595 vector<string> zoneIDlist; 1596 int32_t z=0; 1597 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1598 zoneIDs[i->first] = z++; 1599 zoneIDlist.push_back(i->first); 1600 } 1601 assert(z == (int32_t) ZONEINFO.size()); 1602 1603 // 3. Merge aliases. Sometimes aliases link to other aliases; we 1604 // resolve these into simplest possible sets. 1605 map<string,set<string> > links2; 1606 map<string,string> reverse2; 1607 for (map<string,set<string> >::const_iterator i = links.begin(); 1608 i!=links.end(); ++i) { 1609 string olson = i->first; 1610 while (reverseLinks.find(olson) != reverseLinks.end()) { 1611 olson = reverseLinks[olson]; 1612 } 1613 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1614 links2[olson].insert(*j); 1615 reverse2[*j] = olson; 1616 } 1617 } 1618 links = links2; 1619 reverseLinks = reverse2; 1620 1621 if (false) { // Debugging: Emit link map 1622 for (map<string,set<string> >::const_iterator i = links.begin(); 1623 i!=links.end(); ++i) { 1624 cout << i->first << ": "; 1625 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1626 cout << *j << ", "; 1627 } 1628 cout << endl; 1629 } 1630 } 1631 1632 // 4. Update aliases 1633 for (map<string,set<string> >::const_iterator i = links.begin(); 1634 i!=links.end(); ++i) { 1635 const string& olson = i->first; 1636 const set<string>& aliases = i->second; 1637 ZONEINFO[olson].clearAliases(); 1638 ZONEINFO[olson].addAlias(zoneIDs[olson]); 1639 for (set<string>::const_iterator j=aliases.begin(); 1640 j!=aliases.end(); ++j) { 1641 assert(zoneIDs.find(olson) != zoneIDs.end()); 1642 assert(zoneIDs.find(*j) != zoneIDs.end()); 1643 assert(ZONEINFO.find(*j) != ZONEINFO.end()); 1644 ZONEINFO[*j].setAliasTo(zoneIDs[olson]); 1645 ZONEINFO[olson].addAlias(zoneIDs[*j]); 1646 } 1647 } 1648 1649 // Once merging of final data is complete, we can optimize the type list 1650 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1651 i->second.optimizeTypeList(); 1652 } 1653 1654 // Create the country map 1655 map<string, string> icuRegions; // ICU's custom zone -> country override 1656 map<string, set<string> > countryMap; // country -> set of zones 1657 map<string, string> reverseCountryMap; // zone -> country 1658 1659 try { 1660 // Read icuregions file to collect ICU's own zone-region mapping data. 1661 ifstream frg(ICU_REGIONS); 1662 if (frg) { 1663 string line; 1664 while (getline(frg, line)) { 1665 if (line[0] == '#') continue; 1666 1667 string zone, country; 1668 istringstream is(line); 1669 is >> zone >> country; 1670 if (zone.empty()) continue; 1671 if (country.size() < 2) { 1672 cerr << "Error: Can't parse " << line << " in " << ICU_REGIONS << endl; 1673 return 1; 1674 } 1675 icuRegions[zone] = country; 1676 } 1677 } else { 1678 cout << "No custom region map [icuregions]" << endl; 1679 } 1680 } catch (const exception& error) { 1681 cerr << "Error: While reading " << ICU_REGIONS << ": " << error.what() << endl; 1682 return 1; 1683 } 1684 1685 try { 1686 ifstream f(zonetab.c_str()); 1687 if (!f) { 1688 cerr << "Error: Unable to open " << zonetab << endl; 1689 return 1; 1690 } 1691 int32_t n = 0; 1692 string line; 1693 while (getline(f, line)) { 1694 string::size_type lb = line.find('#'); 1695 if (lb != string::npos) { 1696 line.resize(lb); // trim comments 1697 } 1698 string country, coord, zone; 1699 istringstream is(line); 1700 is >> country >> coord >> zone; 1701 if (country.empty()) continue; 1702 if (country.size() != 2 || zone.empty()) { 1703 cerr << "Error: Can't parse " << line << " in " << zonetab << endl; 1704 return 1; 1705 } 1706 if (ZONEINFO.find(zone) == ZONEINFO.end()) { 1707 cerr << "Error: Country maps to invalid zone " << zone 1708 << " in " << zonetab << endl; 1709 return 1; 1710 } 1711 if (icuRegions.find(zone) != icuRegions.end()) { 1712 // Custom override 1713 string customCountry = icuRegions[zone]; 1714 cout << "Region Mapping: custom override for " << zone 1715 << " " << country << " -> " << customCountry << endl; 1716 country = customCountry; 1717 } 1718 countryMap[country].insert(zone); 1719 reverseCountryMap[zone] = country; 1720 //cerr << (n+1) << ": " << country << " <=> " << zone << endl; 1721 ++n; 1722 } 1723 cout << "Finished reading " << n 1724 << " country entries from " << zonetab << endl; 1725 } catch (const exception& error) { 1726 cerr << "Error: While reading " << zonetab << ": " << error.what() << endl; 1727 return 1; 1728 } 1729 1730 // Merge ICU's own zone-region mapping data 1731 for (map<string,string>::const_iterator i = icuRegions.begin(); 1732 i != icuRegions.end(); ++i) { 1733 const string& zid(i->first); 1734 if (reverseCountryMap.find(zid) != reverseCountryMap.end()) { 1735 continue; 1736 } 1737 cout << "Region Mapping: custom data zone=" << zid 1738 << ", region=" << i->second << endl; 1739 1740 reverseCountryMap[zid] = i->second; 1741 countryMap[i->second].insert(zid); 1742 } 1743 1744 // Merge ICU aliases into country map. Don't merge any alias 1745 // that already has a country map, since that doesn't make sense. 1746 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we 1747 // should cross-map the countries between these two zones. 1748 for (map<string,set<string> >::const_iterator i = links.begin(); 1749 i!=links.end(); ++i) { 1750 const string& olson(i->first); 1751 if (reverseCountryMap.find(olson) == reverseCountryMap.end()) { 1752 continue; 1753 } 1754 string c = reverseCountryMap[olson]; 1755 const set<string>& aliases(i->second); 1756 for (set<string>::const_iterator j=aliases.begin(); 1757 j != aliases.end(); ++j) { 1758 if (reverseCountryMap.find(*j) == reverseCountryMap.end()) { 1759 countryMap[c].insert(*j); 1760 reverseCountryMap[*j] = c; 1761 //cerr << "Aliased country: " << c << " <=> " << *j << endl; 1762 } 1763 } 1764 } 1765 1766 // Create a pseudo-country containing all zones belonging to no country 1767 set<string> nocountry; 1768 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1769 if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) { 1770 nocountry.insert(i->first); 1771 } 1772 } 1773 countryMap[""] = nocountry; 1774 1775 // Get local time & year for below 1776 time_t sec; 1777 time(&sec); 1778 struct tm* now = localtime(&sec); 1779 1780 string filename = TZ_RESOURCE_NAME + ".txt"; 1781 // Write out a resource-bundle source file containing data for 1782 // all zones. 1783 ofstream file(filename.c_str()); 1784 if (file) { 1785 file << "//---------------------------------------------------------" << endl 1786 << "// Copyright (C) 2016 and later: Unicode, Inc. and others." << endl 1787 << "// License & terms of use: http://www.unicode.org/copyright.html" << endl 1788 << "//---------------------------------------------------------" << endl 1789 << "// Build tool: tz2icu" << endl 1790 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */ 1791 << "// tz database: ftp://ftp.iana.org/tz/" << endl 1792 << "// tz version: " << version << endl 1793 << "// ICU version: " << U_ICU_VERSION << endl 1794 << "//---------------------------------------------------------" << endl 1795 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl 1796 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl 1797 << "//---------------------------------------------------------" << endl 1798 << endl 1799 << TZ_RESOURCE_NAME << ":table(nofallback) {" << endl 1800 << " TZVersion { \"" << version << "\" }" << endl 1801 << " Zones:array { " << endl 1802 << ZONEINFO // Zones (the actual data) 1803 << " }" << endl; 1804 1805 // Names correspond to the Zones list, used for binary searching. 1806 printStringList ( file, ZONEINFO ); // print the Names list 1807 1808 // Final Rules are used if requested by the zone 1809 file << " Rules { " << endl; 1810 // Emit final rules 1811 int32_t frc = 0; 1812 for(map<string,FinalRule>::iterator i=finalRules.begin(); 1813 i!=finalRules.end(); ++i) { 1814 const string& id = i->first; 1815 const FinalRule& r = i->second; 1816 file << " " << id << ":intvector {" << endl; 1817 r.print(file); 1818 file << " } //_#" << frc++ << endl; 1819 } 1820 file << " }" << endl; 1821 1822 // Emit country (region) map. 1823 if (ICU44PLUS) { 1824 file << " Regions:array {" << endl; 1825 int32_t zn = 0; 1826 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1827 map<string, string>::iterator cit = reverseCountryMap.find(i->first); 1828 if (cit == reverseCountryMap.end()) { 1829 file << " \"001\","; 1830 } else { 1831 file << " \"" << cit->second << "\", "; 1832 } 1833 file << "//Z#" << zn++ << " " << i->first << endl; 1834 } 1835 file << " }" << endl; 1836 } else { 1837 file << " Regions { " << endl; 1838 int32_t rc = 0; 1839 for (map<string, set<string> >::const_iterator i=countryMap.begin(); 1840 i != countryMap.end(); ++i) { 1841 string country = i->first; 1842 const set<string>& zones(i->second); 1843 file << " "; 1844 if(country[0]==0) { 1845 file << "Default"; 1846 } 1847 file << country << ":intvector { "; 1848 bool first = true; 1849 for (set<string>::const_iterator j=zones.begin(); 1850 j != zones.end(); ++j) { 1851 if (!first) file << ", "; 1852 first = false; 1853 if (zoneIDs.find(*j) == zoneIDs.end()) { 1854 cerr << "Error: Nonexistent zone in country map: " << *j << endl; 1855 return 1; 1856 } 1857 file << zoneIDs[*j]; // emit the zone's index number 1858 } 1859 file << " } //R#" << rc++ << endl; 1860 } 1861 file << " }" << endl; 1862 } 1863 1864 file << "}" << endl; 1865 } 1866 1867 file.close(); 1868 1869 if (file) { // recheck error bit 1870 cout << "Finished writing " << TZ_RESOURCE_NAME << ".txt" << endl; 1871 } else { 1872 cerr << "Error: Unable to open/write to " << TZ_RESOURCE_NAME << ".txt" << endl; 1873 return 1; 1874 } 1875 } 1876 //eof