reslist.cpp (62767B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2000-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File reslist.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 02/21/00 weiv Creation. 17 ******************************************************************************* 18 */ 19 20 // Safer use of UnicodeString. 21 #ifndef UNISTR_FROM_CHAR_EXPLICIT 22 # define UNISTR_FROM_CHAR_EXPLICIT explicit 23 #endif 24 25 // Less important, but still a good idea. 26 #ifndef UNISTR_FROM_STRING_EXPLICIT 27 # define UNISTR_FROM_STRING_EXPLICIT explicit 28 #endif 29 30 #include <assert.h> 31 #include <iostream> 32 #include <set> 33 #include <stdio.h> 34 35 #include "unicode/localpointer.h" 36 #include "reslist.h" 37 #include "unewdata.h" 38 #include "unicode/ures.h" 39 #include "unicode/putil.h" 40 #include "errmsg.h" 41 #include "filterrb.h" 42 #include "toolutil.h" 43 44 #include "uarrsort.h" 45 #include "uelement.h" 46 #include "uhash.h" 47 #include "uinvchar.h" 48 #include "ustr_imp.h" 49 #include "unicode/utf16.h" 50 #include "uassert.h" 51 52 /* 53 * Align binary data at a 16-byte offset from the start of the resource bundle, 54 * to be safe for any data type it may contain. 55 */ 56 #define BIN_ALIGNMENT 16 57 58 // This numeric constant must be at least 1. 59 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once, 60 // and it makes no sense to move it to the pool bundle. 61 // The larger the threshold for fNumUnitsSaved 62 // the smaller the savings, and the smaller the pool bundle. 63 // We trade some total size reduction to reduce the pool bundle a bit, 64 // so that one can reasonably save data size by 65 // removing bundle files without rebuilding the pool bundle. 66 // This can also help to keep the pool and total (pool+local) string indexes 67 // within 16 bits, that is, within range of Table16 and Array16 containers. 68 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 69 # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10 70 #endif 71 72 U_NAMESPACE_USE 73 74 static UBool gIncludeCopyright = false; 75 static UBool gUsePoolBundle = false; 76 static UBool gIsDefaultFormatVersion = true; 77 static int32_t gFormatVersion = 3; 78 79 /* How do we store string values? */ 80 enum { 81 STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */ 82 STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */ 83 }; 84 85 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */ 86 87 static const ResFile kNoPoolBundle; 88 89 /* 90 * res_none() returns the address of kNoResource, 91 * for use in non-error cases when no resource is to be added to the bundle. 92 * (nullptr is used in error cases.) 93 */ 94 static SResource kNoResource; // TODO: const 95 96 static UDataInfo dataInfo= { 97 sizeof(UDataInfo), 98 0, 99 100 U_IS_BIG_ENDIAN, 101 U_CHARSET_FAMILY, 102 sizeof(char16_t), 103 0, 104 105 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */ 106 {1, 3, 0, 0}, /* formatVersion */ 107 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/ 108 }; 109 110 static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */ 111 { 0, 0, 0, 0 }, 112 { 1, 3, 0, 0 }, 113 { 2, 0, 0, 0 }, 114 { 3, 0, 0, 0 } 115 }; 116 // Remember to update genrb.h GENRB_VERSION when changing the data format. 117 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?) 118 119 static uint8_t calcPadding(uint32_t size) { 120 /* returns space we need to pad */ 121 return static_cast<uint8_t>(size % sizeof(uint32_t) ? sizeof(uint32_t) - (size % sizeof(uint32_t)) : 0); 122 123 } 124 125 void setIncludeCopyright(UBool val){ 126 gIncludeCopyright=val; 127 } 128 129 UBool getIncludeCopyright(){ 130 return gIncludeCopyright; 131 } 132 133 void setFormatVersion(int32_t formatVersion) { 134 gIsDefaultFormatVersion = false; 135 gFormatVersion = formatVersion; 136 } 137 138 int32_t getFormatVersion() { 139 return gFormatVersion; 140 } 141 142 void setUsePoolBundle(UBool use) { 143 gUsePoolBundle = use; 144 } 145 146 // TODO: return const pointer, or find another way to express "none" 147 struct SResource* res_none() { 148 return &kNoResource; 149 } 150 151 SResource::SResource() 152 : fType(URES_NONE), fWritten(false), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1), 153 line(0), fNext(nullptr) { 154 ustr_init(&fComment); 155 } 156 157 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment, 158 UErrorCode &errorCode) 159 : fType(type), fWritten(false), fRes(RES_BOGUS), fRes16(-1), 160 fKey(bundle != nullptr ? bundle->addTag(tag, errorCode) : -1), fKey16(-1), 161 line(0), fNext(nullptr) { 162 ustr_init(&fComment); 163 if(comment != nullptr) { 164 ustr_cpy(&fComment, comment, &errorCode); 165 } 166 } 167 168 SResource::~SResource() { 169 ustr_deinit(&fComment); 170 } 171 172 ContainerResource::~ContainerResource() { 173 SResource *current = fFirst; 174 while (current != nullptr) { 175 SResource *next = current->fNext; 176 delete current; 177 current = next; 178 } 179 } 180 181 TableResource::~TableResource() {} 182 183 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer 184 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) { 185 if (U_FAILURE(errorCode) || res == nullptr || res == &kNoResource) { 186 return; 187 } 188 189 /* remember this linenumber to report to the user if there is a duplicate key */ 190 res->line = linenumber; 191 192 /* here we need to traverse the list */ 193 ++fCount; 194 195 /* is the list still empty? */ 196 if (fFirst == nullptr) { 197 fFirst = res; 198 res->fNext = nullptr; 199 return; 200 } 201 202 const char *resKeyString = fRoot->fKeys + res->fKey; 203 204 SResource *current = fFirst; 205 206 SResource *prev = nullptr; 207 while (current != nullptr) { 208 const char *currentKeyString = fRoot->fKeys + current->fKey; 209 int diff; 210 /* 211 * formatVersion 1: compare key strings in native-charset order 212 * formatVersion 2 and up: compare key strings in ASCII order 213 */ 214 if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) { 215 diff = uprv_strcmp(currentKeyString, resKeyString); 216 } else { 217 diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString); 218 } 219 if (diff < 0) { 220 prev = current; 221 current = current->fNext; 222 } else if (diff > 0) { 223 /* we're either in front of the list, or in the middle */ 224 if (prev == nullptr) { 225 /* front of the list */ 226 fFirst = res; 227 } else { 228 /* middle of the list */ 229 prev->fNext = res; 230 } 231 232 res->fNext = current; 233 return; 234 } else { 235 /* Key already exists! ERROR! */ 236 error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line); 237 errorCode = U_UNSUPPORTED_ERROR; 238 return; 239 } 240 } 241 242 /* end of list */ 243 prev->fNext = res; 244 res->fNext = nullptr; 245 } 246 247 ArrayResource::~ArrayResource() {} 248 249 void ArrayResource::add(SResource *res) { 250 if (res != nullptr && res != &kNoResource) { 251 if (fFirst == nullptr) { 252 fFirst = res; 253 } else { 254 fLast->fNext = res; 255 } 256 fLast = res; 257 ++fCount; 258 } 259 } 260 261 PseudoListResource::~PseudoListResource() {} 262 263 void PseudoListResource::add(SResource *res) { 264 if (res != nullptr && res != &kNoResource) { 265 res->fNext = fFirst; 266 fFirst = res; 267 ++fCount; 268 } 269 } 270 271 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type, 272 const char16_t *value, int32_t len, 273 const UString* comment, UErrorCode &errorCode) 274 : SResource(bundle, tag, type, comment, errorCode) { 275 if (len == 0 && gFormatVersion > 1) { 276 fRes = URES_MAKE_EMPTY_RESOURCE(type); 277 fWritten = true; 278 return; 279 } 280 281 fString.setTo(ConstChar16Ptr(value), len); 282 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 283 if (U_SUCCESS(errorCode) && fString.isBogus()) { 284 errorCode = U_MEMORY_ALLOCATION_ERROR; 285 } 286 } 287 288 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type, 289 const icu::UnicodeString &value, UErrorCode &errorCode) 290 : SResource(bundle, nullptr, type, nullptr, errorCode), fString(value) { 291 if (value.isEmpty() && gFormatVersion > 1) { 292 fRes = URES_MAKE_EMPTY_RESOURCE(type); 293 fWritten = true; 294 return; 295 } 296 297 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 298 if (U_SUCCESS(errorCode) && fString.isBogus()) { 299 errorCode = U_MEMORY_ALLOCATION_ERROR; 300 } 301 } 302 303 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty. 304 StringBaseResource::StringBaseResource(int8_t type, const char16_t *value, int32_t len, 305 UErrorCode &errorCode) 306 : SResource(nullptr, nullptr, type, nullptr, errorCode), fString(true, value, len) { 307 assert(len > 0); 308 assert(!fString.isBogus()); 309 } 310 311 StringBaseResource::~StringBaseResource() {} 312 313 static int32_t U_CALLCONV 314 string_hash(const UElement key) { 315 const StringResource *res = static_cast<const StringResource *>(key.pointer); 316 return res->fString.hashCode(); 317 } 318 319 static UBool U_CALLCONV 320 string_comp(const UElement key1, const UElement key2) { 321 const StringResource *res1 = static_cast<const StringResource *>(key1.pointer); 322 const StringResource *res2 = static_cast<const StringResource *>(key2.pointer); 323 return res1->fString == res2->fString; 324 } 325 326 StringResource::~StringResource() {} 327 328 AliasResource::~AliasResource() {} 329 330 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value, 331 const UString* comment, UErrorCode &errorCode) 332 : SResource(bundle, tag, URES_INT, comment, errorCode) { 333 fValue = value; 334 fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET); 335 fWritten = true; 336 } 337 338 IntResource::~IntResource() {} 339 340 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag, 341 const UString* comment, UErrorCode &errorCode) 342 : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode), 343 fCount(0), fSize(RESLIST_INT_VECTOR_INIT_SIZE), 344 fArray(new uint32_t[fSize]) { 345 if (fArray == nullptr) { 346 errorCode = U_MEMORY_ALLOCATION_ERROR; 347 return; 348 } 349 } 350 351 IntVectorResource::~IntVectorResource() { 352 delete[] fArray; 353 } 354 355 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) { 356 if (fCount == fSize) { 357 uint32_t* tmp = new uint32_t[2 * fSize]; 358 if (tmp == nullptr) { 359 errorCode = U_MEMORY_ALLOCATION_ERROR; 360 return; 361 } 362 uprv_memcpy(tmp, fArray, fSize * sizeof(uint32_t)); 363 delete[] fArray; 364 fArray = tmp; 365 fSize *= 2; 366 } 367 if (U_SUCCESS(errorCode)) { 368 fArray[fCount++] = value; 369 } 370 } 371 372 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag, 373 uint32_t length, uint8_t *data, const char* fileName, 374 const UString* comment, UErrorCode &errorCode) 375 : SResource(bundle, tag, URES_BINARY, comment, errorCode), 376 fLength(length), fData(nullptr), fFileName(nullptr) { 377 if (U_FAILURE(errorCode)) { 378 return; 379 } 380 if (fileName != nullptr && *fileName != 0){ 381 fFileName = new char[uprv_strlen(fileName)+1]; 382 if (fFileName == nullptr) { 383 errorCode = U_MEMORY_ALLOCATION_ERROR; 384 return; 385 } 386 uprv_strcpy(fFileName, fileName); 387 } 388 if (length > 0) { 389 fData = new uint8_t[length]; 390 if (fData == nullptr) { 391 errorCode = U_MEMORY_ALLOCATION_ERROR; 392 return; 393 } 394 uprv_memcpy(fData, data, length); 395 } else { 396 if (gFormatVersion > 1) { 397 fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY); 398 fWritten = true; 399 } 400 } 401 } 402 403 BinaryResource::~BinaryResource() { 404 delete[] fData; 405 delete[] fFileName; 406 } 407 408 /* Writing Functions */ 409 410 void 411 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 412 UErrorCode &errorCode) { 413 assert(fSame == nullptr); 414 fSame = static_cast<StringResource *>(uhash_get(stringSet, this)); 415 if (fSame != nullptr) { 416 // This is a duplicate of a pool bundle string or of an earlier-visited string. 417 if (++fSame->fNumCopies == 1) { 418 assert(fSame->fWritten); 419 int32_t poolStringIndex = static_cast<int32_t>(RES_GET_OFFSET(fSame->fRes)); 420 if (poolStringIndex >= bundle->fPoolStringIndexLimit) { 421 bundle->fPoolStringIndexLimit = poolStringIndex + 1; 422 } 423 } 424 return; 425 } 426 /* Put this string into the set for finding duplicates. */ 427 fNumCopies = 1; 428 uhash_put(stringSet, this, this, &errorCode); 429 430 if (bundle->fStringsForm != STRINGS_UTF16_V1) { 431 int32_t len = length(); 432 if (len <= MAX_IMPLICIT_STRING_LENGTH && 433 !U16_IS_TRAIL(fString[0]) && fString.indexOf(static_cast<char16_t>(0)) < 0) { 434 /* 435 * This string will be stored without an explicit length. 436 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen(). 437 */ 438 fNumCharsForLength = 0; 439 } else if (len <= 0x3ee) { 440 fNumCharsForLength = 1; 441 } else if (len <= 0xfffff) { 442 fNumCharsForLength = 2; 443 } else { 444 fNumCharsForLength = 3; 445 } 446 bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */ 447 } 448 } 449 450 void 451 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 452 UErrorCode &errorCode) { 453 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 454 current->preflightStrings(bundle, stringSet, errorCode); 455 } 456 } 457 458 void 459 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) { 460 if (U_FAILURE(errorCode)) { 461 return; 462 } 463 if (fRes != RES_BOGUS) { 464 /* 465 * The resource item word was already precomputed, which means 466 * no further data needs to be written. 467 * This might be an integer, or an empty string/binary/etc. 468 */ 469 return; 470 } 471 handlePreflightStrings(bundle, stringSet, errorCode); 472 } 473 474 void 475 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/, 476 UErrorCode & /*errorCode*/) { 477 /* Neither a string nor a container. */ 478 } 479 480 int32_t 481 SRBRoot::makeRes16(uint32_t resWord) const { 482 if (resWord == 0) { 483 return 0; /* empty string */ 484 } 485 uint32_t type = RES_GET_TYPE(resWord); 486 int32_t offset = static_cast<int32_t>(RES_GET_OFFSET(resWord)); 487 if (type == URES_STRING_V2) { 488 assert(offset > 0); 489 if (offset < fPoolStringIndexLimit) { 490 if (offset < fPoolStringIndex16Limit) { 491 return offset; 492 } 493 } else { 494 offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit; 495 if (offset <= 0xffff) { 496 return offset; 497 } 498 } 499 } 500 return -1; 501 } 502 503 int32_t 504 SRBRoot::mapKey(int32_t oldpos) const { 505 const KeyMapEntry *map = fKeyMap; 506 if (map == nullptr) { 507 return oldpos; 508 } 509 int32_t i, start, limit; 510 511 /* do a binary search for the old, pre-compactKeys() key offset */ 512 start = fUsePoolBundle->fKeysCount; 513 limit = start + fKeysCount; 514 while (start < limit - 1) { 515 i = (start + limit) / 2; 516 if (oldpos < map[i].oldpos) { 517 limit = i; 518 } else { 519 start = i; 520 } 521 } 522 assert(oldpos == map[start].oldpos); 523 return map[start].newpos; 524 } 525 526 /* 527 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings. 528 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS 529 * and exits early. 530 */ 531 void 532 StringResource::handleWrite16(SRBRoot * /*bundle*/) { 533 SResource *same; 534 if ((same = fSame) != nullptr) { 535 /* This is a duplicate. */ 536 assert(same->fRes != RES_BOGUS && same->fWritten); 537 fRes = same->fRes; 538 fWritten = same->fWritten; 539 } 540 } 541 542 void 543 ContainerResource::writeAllRes16(SRBRoot *bundle) { 544 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 545 bundle->f16BitUnits.append(static_cast<char16_t>(current->fRes16)); 546 } 547 fWritten = true; 548 } 549 550 void 551 ArrayResource::handleWrite16(SRBRoot *bundle) { 552 if (fCount == 0 && gFormatVersion > 1) { 553 fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY); 554 fWritten = true; 555 return; 556 } 557 558 int32_t res16 = 0; 559 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 560 current->write16(bundle); 561 res16 |= current->fRes16; 562 } 563 if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) { 564 fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length()); 565 bundle->f16BitUnits.append(static_cast<char16_t>(fCount)); 566 writeAllRes16(bundle); 567 } 568 } 569 570 void 571 TableResource::handleWrite16(SRBRoot *bundle) { 572 if (fCount == 0 && gFormatVersion > 1) { 573 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 574 fWritten = true; 575 return; 576 } 577 /* Find the smallest table type that fits the data. */ 578 int32_t key16 = 0; 579 int32_t res16 = 0; 580 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 581 current->write16(bundle); 582 key16 |= current->fKey16; 583 res16 |= current->fRes16; 584 } 585 if (fCount > static_cast<uint32_t>(bundle->fMaxTableLength)) { 586 bundle->fMaxTableLength = fCount; 587 } 588 if (fCount <= 0xffff && key16 >= 0) { 589 if (res16 >= 0 && gFormatVersion > 1) { 590 /* 16-bit count, key offsets and values */ 591 fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length()); 592 bundle->f16BitUnits.append(static_cast<char16_t>(fCount)); 593 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 594 bundle->f16BitUnits.append(static_cast<char16_t>(current->fKey16)); 595 } 596 writeAllRes16(bundle); 597 } else { 598 /* 16-bit count, 16-bit key offsets, 32-bit values */ 599 fTableType = URES_TABLE; 600 } 601 } else { 602 /* 32-bit count, key offsets and values */ 603 fTableType = URES_TABLE32; 604 } 605 } 606 607 void 608 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) { 609 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 610 fWritten = true; 611 } 612 613 void 614 SResource::write16(SRBRoot *bundle) { 615 if (fKey >= 0) { 616 // A tagged resource has a non-negative key index into the parsed key strings. 617 // compactKeys() built a map from parsed key index to the final key index. 618 // After the mapping, negative key indexes are used for shared pool bundle keys. 619 fKey = bundle->mapKey(fKey); 620 // If the key index fits into a Key16 for a Table or Table16, 621 // then set the fKey16 field accordingly. 622 // Otherwise keep it at -1. 623 if (fKey >= 0) { 624 if (fKey < bundle->fLocalKeyLimit) { 625 fKey16 = fKey; 626 } 627 } else { 628 int32_t poolKeyIndex = fKey & 0x7fffffff; 629 if (poolKeyIndex <= 0xffff) { 630 poolKeyIndex += bundle->fLocalKeyLimit; 631 if (poolKeyIndex <= 0xffff) { 632 fKey16 = poolKeyIndex; 633 } 634 } 635 } 636 } 637 /* 638 * fRes != RES_BOGUS: 639 * The resource item word was already precomputed, which means 640 * no further data needs to be written. 641 * This might be an integer, or an empty or UTF-16 v2 string, 642 * an empty binary, etc. 643 */ 644 if (fRes == RES_BOGUS) { 645 handleWrite16(bundle); 646 } 647 // Compute fRes16 for precomputed as well as just-computed fRes. 648 fRes16 = bundle->makeRes16(fRes); 649 } 650 651 void 652 SResource::handleWrite16(SRBRoot * /*bundle*/) { 653 /* Only a few resource types write 16-bit units. */ 654 } 655 656 /* 657 * Only called for UTF-16 v1 strings, and for aliases. 658 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS 659 * and exits early. 660 */ 661 void 662 StringBaseResource::handlePreWrite(uint32_t *byteOffset) { 663 /* Write the UTF-16 v1 string. */ 664 fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2); 665 *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR; 666 } 667 668 void 669 IntVectorResource::handlePreWrite(uint32_t *byteOffset) { 670 if (fCount == 0 && gFormatVersion > 1) { 671 fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR); 672 fWritten = true; 673 } else { 674 fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2); 675 *byteOffset += (1 + fCount) * 4; 676 } 677 } 678 679 void 680 BinaryResource::handlePreWrite(uint32_t *byteOffset) { 681 uint32_t pad = 0; 682 uint32_t dataStart = *byteOffset + sizeof(fLength); 683 684 if (dataStart % BIN_ALIGNMENT) { 685 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 686 *byteOffset += pad; /* pad == 4 or 8 or 12 */ 687 } 688 fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2); 689 *byteOffset += 4 + fLength; 690 } 691 692 void 693 ContainerResource::preWriteAllRes(uint32_t *byteOffset) { 694 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 695 current->preWrite(byteOffset); 696 } 697 } 698 699 void 700 ArrayResource::handlePreWrite(uint32_t *byteOffset) { 701 preWriteAllRes(byteOffset); 702 fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2); 703 *byteOffset += (1 + fCount) * 4; 704 } 705 706 void 707 TableResource::handlePreWrite(uint32_t *byteOffset) { 708 preWriteAllRes(byteOffset); 709 if (fTableType == URES_TABLE) { 710 /* 16-bit count, 16-bit key offsets, 32-bit values */ 711 fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2); 712 *byteOffset += 2 + fCount * 6; 713 } else { 714 /* 32-bit count, key offsets and values */ 715 fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2); 716 *byteOffset += 4 + fCount * 8; 717 } 718 } 719 720 void 721 SResource::preWrite(uint32_t *byteOffset) { 722 if (fRes != RES_BOGUS) { 723 /* 724 * The resource item word was already precomputed, which means 725 * no further data needs to be written. 726 * This might be an integer, or an empty or UTF-16 v2 string, 727 * an empty binary, etc. 728 */ 729 return; 730 } 731 handlePreWrite(byteOffset); 732 *byteOffset += calcPadding(*byteOffset); 733 } 734 735 void 736 SResource::handlePreWrite(uint32_t * /*byteOffset*/) { 737 assert(false); 738 } 739 740 /* 741 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings, 742 * write() sees fWritten and exits early. 743 */ 744 void 745 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 746 /* Write the UTF-16 v1 string. */ 747 int32_t len = length(); 748 udata_write32(mem, len); 749 udata_writeUString(mem, getBuffer(), len + 1); 750 *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR; 751 fWritten = true; 752 } 753 754 void 755 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) { 756 uint32_t i = 0; 757 for (SResource *current = fFirst; current != nullptr; ++i, current = current->fNext) { 758 current->write(mem, byteOffset); 759 } 760 assert(i == fCount); 761 } 762 763 void 764 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) { 765 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 766 udata_write32(mem, current->fRes); 767 } 768 *byteOffset += fCount * 4; 769 } 770 771 void 772 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 773 writeAllRes(mem, byteOffset); 774 udata_write32(mem, fCount); 775 *byteOffset += 4; 776 writeAllRes32(mem, byteOffset); 777 } 778 779 void 780 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 781 udata_write32(mem, fCount); 782 for(uint32_t i = 0; i < fCount; ++i) { 783 udata_write32(mem, fArray[i]); 784 } 785 *byteOffset += (1 + fCount) * 4; 786 } 787 788 void 789 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 790 uint32_t pad = 0; 791 uint32_t dataStart = *byteOffset + sizeof(fLength); 792 793 if (dataStart % BIN_ALIGNMENT) { 794 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 795 udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */ 796 *byteOffset += pad; 797 } 798 799 udata_write32(mem, fLength); 800 if (fLength > 0) { 801 udata_writeBlock(mem, fData, fLength); 802 } 803 *byteOffset += 4 + fLength; 804 } 805 806 void 807 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 808 writeAllRes(mem, byteOffset); 809 if(fTableType == URES_TABLE) { 810 udata_write16(mem, static_cast<uint16_t>(fCount)); 811 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 812 udata_write16(mem, current->fKey16); 813 } 814 *byteOffset += (1 + fCount)* 2; 815 if ((fCount & 1) == 0) { 816 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */ 817 udata_writePadding(mem, 2); 818 *byteOffset += 2; 819 } 820 } else /* URES_TABLE32 */ { 821 udata_write32(mem, fCount); 822 for (SResource *current = fFirst; current != nullptr; current = current->fNext) { 823 udata_write32(mem, static_cast<uint32_t>(current->fKey)); 824 } 825 *byteOffset += (1 + fCount)* 4; 826 } 827 writeAllRes32(mem, byteOffset); 828 } 829 830 void 831 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) { 832 if (fWritten) { 833 assert(fRes != RES_BOGUS); 834 return; 835 } 836 handleWrite(mem, byteOffset); 837 uint8_t paddingSize = calcPadding(*byteOffset); 838 if (paddingSize > 0) { 839 udata_writePadding(mem, paddingSize); 840 *byteOffset += paddingSize; 841 } 842 fWritten = true; 843 } 844 845 void 846 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) { 847 assert(false); 848 } 849 850 void SRBRoot::write(const char *outputDir, const char *outputPkg, 851 char *writtenFilename, int writtenFilenameLen, 852 UErrorCode &errorCode) { 853 UNewDataMemory *mem = nullptr; 854 uint32_t byteOffset = 0; 855 uint32_t top, size; 856 char dataName[1024]; 857 int32_t indexes[URES_INDEX_TOP]; 858 859 compactKeys(errorCode); 860 /* 861 * Add padding bytes to fKeys so that fKeysTop is 4-aligned. 862 * Safe because the capacity is a multiple of 4. 863 */ 864 while (fKeysTop & 3) { 865 fKeys[fKeysTop++] = static_cast<char>(0xaa); 866 } 867 /* 868 * In URES_TABLE, use all local key offsets that fit into 16 bits, 869 * and use the remaining 16-bit offsets for pool key offsets 870 * if there are any. 871 * If there are no local keys, then use the whole 16-bit space 872 * for pool key offsets. 873 * Note: This cannot be changed without changing the major formatVersion. 874 */ 875 if (fKeysBottom < fKeysTop) { 876 if (fKeysTop <= 0x10000) { 877 fLocalKeyLimit = fKeysTop; 878 } else { 879 fLocalKeyLimit = 0x10000; 880 } 881 } else { 882 fLocalKeyLimit = 0; 883 } 884 885 UHashtable *stringSet; 886 if (gFormatVersion > 1) { 887 stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode); 888 if (U_SUCCESS(errorCode) && 889 fUsePoolBundle != nullptr && fUsePoolBundle->fStrings != nullptr) { 890 for (SResource *current = fUsePoolBundle->fStrings->fFirst; 891 current != nullptr; 892 current = current->fNext) { 893 StringResource *sr = static_cast<StringResource *>(current); 894 sr->fNumCopies = 0; 895 sr->fNumUnitsSaved = 0; 896 uhash_put(stringSet, sr, sr, &errorCode); 897 } 898 } 899 fRoot->preflightStrings(this, stringSet, errorCode); 900 } else { 901 stringSet = nullptr; 902 } 903 if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) { 904 compactStringsV2(stringSet, errorCode); 905 } 906 uhash_close(stringSet); 907 if (U_FAILURE(errorCode)) { 908 return; 909 } 910 911 int32_t formatVersion = gFormatVersion; 912 if (fPoolStringIndexLimit != 0) { 913 int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit; 914 if ((sum - 1) > RES_MAX_OFFSET) { 915 errorCode = U_BUFFER_OVERFLOW_ERROR; 916 return; 917 } 918 if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) { 919 // 16-bit indexes work for all pool + local strings. 920 fPoolStringIndex16Limit = fPoolStringIndexLimit; 921 } else { 922 // Set the pool index threshold so that 16-bit indexes work 923 // for some pool strings and some local strings. 924 fPoolStringIndex16Limit = static_cast<int32_t>( 925 (static_cast<int64_t>(fPoolStringIndexLimit) * 0xffff) / sum); 926 } 927 } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) { 928 // If we just default to formatVersion 3 929 // but there are no pool bundle strings to share 930 // and we do not write a pool bundle, 931 // then write formatVersion 2 which is just as good. 932 formatVersion = 2; 933 } 934 935 fRoot->write16(this); 936 if (f16BitUnits.isBogus()) { 937 errorCode = U_MEMORY_ALLOCATION_ERROR; 938 return; 939 } 940 if (f16BitUnits.length() & 1) { 941 f16BitUnits.append(static_cast<char16_t>(0xaaaa)); /* pad to multiple of 4 bytes */ 942 } 943 944 byteOffset = fKeysTop + f16BitUnits.length() * 2; 945 fRoot->preWrite(&byteOffset); 946 947 /* total size including the root item */ 948 top = byteOffset; 949 950 if (writtenFilename && writtenFilenameLen) { 951 *writtenFilename = 0; 952 } 953 954 if (writtenFilename) { 955 int32_t off = 0, len = 0; 956 if (outputDir) { 957 uprv_strncpy(writtenFilename, outputDir, writtenFilenameLen); 958 } 959 if (writtenFilenameLen -= len) { 960 off += len; 961 writtenFilename[off] = U_FILE_SEP_CHAR; 962 if (--writtenFilenameLen) { 963 ++off; 964 if(outputPkg != nullptr) 965 { 966 uprv_strcpy(writtenFilename+off, outputPkg); 967 off += static_cast<int32_t>(uprv_strlen(outputPkg)); 968 writtenFilename[off] = '_'; 969 ++off; 970 } 971 972 len = static_cast<int32_t>(uprv_strlen(fLocale)); 973 if (len > writtenFilenameLen) { 974 len = writtenFilenameLen; 975 } 976 uprv_strncpy(writtenFilename + off, fLocale, writtenFilenameLen - off); 977 if (writtenFilenameLen -= len) { 978 off += len; 979 uprv_strncpy(writtenFilename + off, ".res", writtenFilenameLen - off); 980 } 981 } 982 } 983 } 984 985 if(outputPkg) 986 { 987 uprv_strcpy(dataName, outputPkg); 988 uprv_strcat(dataName, "_"); 989 uprv_strcat(dataName, fLocale); 990 } 991 else 992 { 993 uprv_strcpy(dataName, fLocale); 994 } 995 996 uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo)); 997 998 mem = udata_create(outputDir, "res", dataName, 999 &dataInfo, (gIncludeCopyright==true)? U_COPYRIGHT_STRING:nullptr, &errorCode); 1000 if(U_FAILURE(errorCode)){ 1001 return; 1002 } 1003 1004 /* write the root item */ 1005 udata_write32(mem, fRoot->fRes); 1006 1007 /* 1008 * formatVersion 1.1 (ICU 2.8): 1009 * write int32_t indexes[] after root and before the key strings 1010 * to make it easier to parse resource bundles in icuswap or from Java etc. 1011 */ 1012 uprv_memset(indexes, 0, sizeof(indexes)); 1013 indexes[URES_INDEX_LENGTH]= fIndexLength; 1014 indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2; 1015 indexes[URES_INDEX_RESOURCES_TOP] = static_cast<int32_t>(top >> 2); 1016 indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP]; 1017 indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength; 1018 1019 /* 1020 * formatVersion 1.2 (ICU 3.6): 1021 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set 1022 * the memset() above initialized all indexes[] to 0 1023 */ 1024 if (fNoFallback) { 1025 indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK; 1026 } 1027 /* 1028 * formatVersion 2.0 (ICU 4.4): 1029 * more compact string value storage, optional pool bundle 1030 */ 1031 if (URES_INDEX_16BIT_TOP < fIndexLength) { 1032 indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1); 1033 } 1034 if (URES_INDEX_POOL_CHECKSUM < fIndexLength) { 1035 if (fIsPoolBundle) { 1036 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK; 1037 uint32_t checksum = computeCRC(static_cast<const char*>(fKeys + fKeysBottom), 1038 static_cast<uint32_t>(fKeysTop - fKeysBottom), 0); 1039 if (f16BitUnits.length() <= 1) { 1040 // no pool strings to checksum 1041 } else if (U_IS_BIG_ENDIAN) { 1042 checksum = computeCRC(reinterpret_cast<const char *>(f16BitUnits.getBuffer()), 1043 static_cast<uint32_t>(f16BitUnits.length()) * 2, checksum); 1044 } else { 1045 // Swap to big-endian so we get the same checksum on all platforms 1046 // (except for charset family, due to the key strings). 1047 UnicodeString s(f16BitUnits); 1048 assert(!s.isBogus()); 1049 // .getBuffer(capacity) returns a mutable buffer 1050 char16_t* p = s.getBuffer(f16BitUnits.length()); 1051 for (int32_t count = f16BitUnits.length(); count > 0; --count) { 1052 uint16_t x = *p; 1053 *p++ = static_cast<uint16_t>((x << 8) | (x >> 8)); 1054 } 1055 s.releaseBuffer(f16BitUnits.length()); 1056 checksum = computeCRC(reinterpret_cast<const char*>(s.getBuffer()), 1057 static_cast<uint32_t>(f16BitUnits.length()) * 2, checksum); 1058 } 1059 indexes[URES_INDEX_POOL_CHECKSUM] = static_cast<int32_t>(checksum); 1060 } else if (gUsePoolBundle) { 1061 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE; 1062 indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum; 1063 } 1064 } 1065 // formatVersion 3 (ICU 56): 1066 // share string values via pool bundle strings 1067 indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8 1068 indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12 1069 indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16; 1070 1071 /* write the indexes[] */ 1072 udata_writeBlock(mem, indexes, fIndexLength*4); 1073 1074 /* write the table key strings */ 1075 udata_writeBlock(mem, fKeys+fKeysBottom, 1076 fKeysTop-fKeysBottom); 1077 1078 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */ 1079 udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2); 1080 1081 /* write all of the bundle contents: the root item and its children */ 1082 byteOffset = fKeysTop + f16BitUnits.length() * 2; 1083 fRoot->write(mem, &byteOffset); 1084 assert(byteOffset == top); 1085 1086 size = udata_finish(mem, &errorCode); 1087 if(top != size) { 1088 fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n", 1089 static_cast<int>(size), static_cast<int>(top)); 1090 errorCode = U_INTERNAL_PROGRAM_ERROR; 1091 } 1092 } 1093 1094 /* Opening Functions */ 1095 1096 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1097 LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status); 1098 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1099 } 1100 1101 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1102 LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status); 1103 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1104 } 1105 1106 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1107 LocalPointer<SResource> res( 1108 new StringResource(bundle, tag, value, len, comment, *status), *status); 1109 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1110 } 1111 1112 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1113 LocalPointer<SResource> res( 1114 new AliasResource(bundle, tag, value, len, comment, *status), *status); 1115 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1116 } 1117 1118 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1119 LocalPointer<IntVectorResource> res( 1120 new IntVectorResource(bundle, tag, comment, *status), *status); 1121 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1122 } 1123 1124 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) { 1125 LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status); 1126 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1127 } 1128 1129 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) { 1130 LocalPointer<SResource> res( 1131 new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status); 1132 return U_SUCCESS(*status) ? res.orphan() : nullptr; 1133 } 1134 1135 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode) 1136 : fRoot(nullptr), fLocale(nullptr), fIndexLength(0), fMaxTableLength(0), fNoFallback(false), 1137 fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle), 1138 fKeys(nullptr), fKeyMap(nullptr), 1139 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), 1140 fKeysCount(0), fLocalKeyLimit(0), 1141 f16BitUnits(), f16BitStringsLength(0), 1142 fUsePoolBundle(&kNoPoolBundle), 1143 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0), 1144 fWritePoolBundle(nullptr) { 1145 if (U_FAILURE(errorCode)) { 1146 return; 1147 } 1148 1149 if (gFormatVersion > 1) { 1150 // f16BitUnits must start with a zero for empty resources. 1151 // We might be able to omit it if there are no empty 16-bit resources. 1152 f16BitUnits.append(static_cast<char16_t>(0)); 1153 } 1154 1155 fKeys = static_cast<char*>(uprv_malloc(sizeof(char) * KEY_SPACE_SIZE)); 1156 if (isPoolBundle) { 1157 fRoot = new PseudoListResource(this, errorCode); 1158 } else { 1159 fRoot = new TableResource(this, nullptr, comment, errorCode); 1160 } 1161 if (fKeys == nullptr || fRoot == nullptr || U_FAILURE(errorCode)) { 1162 if (U_SUCCESS(errorCode)) { 1163 errorCode = U_MEMORY_ALLOCATION_ERROR; 1164 } 1165 return; 1166 } 1167 1168 fKeysCapacity = KEY_SPACE_SIZE; 1169 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */ 1170 if (gUsePoolBundle || isPoolBundle) { 1171 fIndexLength = URES_INDEX_POOL_CHECKSUM + 1; 1172 } else if (gFormatVersion >= 2) { 1173 fIndexLength = URES_INDEX_16BIT_TOP + 1; 1174 } else /* formatVersion 1 */ { 1175 fIndexLength = URES_INDEX_ATTRIBUTES + 1; 1176 } 1177 fKeysBottom = (1 /* root */ + fIndexLength) * 4; 1178 uprv_memset(fKeys, 0, fKeysBottom); 1179 fKeysTop = fKeysBottom; 1180 1181 if (gFormatVersion == 1) { 1182 fStringsForm = STRINGS_UTF16_V1; 1183 } else { 1184 fStringsForm = STRINGS_UTF16_V2; 1185 } 1186 } 1187 1188 /* Closing Functions */ 1189 1190 void res_close(struct SResource *res) { 1191 delete res; 1192 } 1193 1194 SRBRoot::~SRBRoot() { 1195 delete fRoot; 1196 uprv_free(fLocale); 1197 uprv_free(fKeys); 1198 uprv_free(fKeyMap); 1199 } 1200 1201 /* Misc Functions */ 1202 1203 void SRBRoot::setLocale(char16_t *locale, UErrorCode &errorCode) { 1204 if(U_FAILURE(errorCode)) { 1205 return; 1206 } 1207 1208 uprv_free(fLocale); 1209 fLocale = static_cast<char*>(uprv_malloc(sizeof(char) * (u_strlen(locale) + 1))); 1210 if(fLocale == nullptr) { 1211 errorCode = U_MEMORY_ALLOCATION_ERROR; 1212 return; 1213 } 1214 1215 u_UCharsToChars(locale, fLocale, u_strlen(locale)+1); 1216 } 1217 1218 const char * 1219 SRBRoot::getKeyString(int32_t key) const { 1220 if (key < 0) { 1221 return fUsePoolBundle->fKeys + (key & 0x7fffffff); 1222 } else { 1223 return fKeys + key; 1224 } 1225 } 1226 1227 const char * 1228 SResource::getKeyString(const SRBRoot *bundle) const { 1229 if (fKey == -1) { 1230 return nullptr; 1231 } 1232 return bundle->getKeyString(fKey); 1233 } 1234 1235 const char * 1236 SRBRoot::getKeyBytes(int32_t *pLength) const { 1237 *pLength = fKeysTop - fKeysBottom; 1238 return fKeys + fKeysBottom; 1239 } 1240 1241 int32_t 1242 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) { 1243 int32_t keypos; 1244 1245 // It is not legal to add new key bytes after compactKeys is run! 1246 U_ASSERT(fKeyMap == nullptr); 1247 1248 if (U_FAILURE(errorCode)) { 1249 return -1; 1250 } 1251 if (length < 0 || (keyBytes == nullptr && length != 0)) { 1252 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1253 return -1; 1254 } 1255 if (length == 0) { 1256 return fKeysTop; 1257 } 1258 1259 keypos = fKeysTop; 1260 fKeysTop += length; 1261 if (fKeysTop >= fKeysCapacity) { 1262 /* overflow - resize the keys buffer */ 1263 fKeysCapacity += KEY_SPACE_SIZE; 1264 fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity)); 1265 if(fKeys == nullptr) { 1266 errorCode = U_MEMORY_ALLOCATION_ERROR; 1267 return -1; 1268 } 1269 } 1270 1271 uprv_memcpy(fKeys + keypos, keyBytes, length); 1272 1273 return keypos; 1274 } 1275 1276 int32_t 1277 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) { 1278 int32_t keypos; 1279 1280 if (U_FAILURE(errorCode)) { 1281 return -1; 1282 } 1283 1284 if (tag == nullptr) { 1285 /* no error: the root table and array items have no keys */ 1286 return -1; 1287 } 1288 1289 keypos = addKeyBytes(tag, static_cast<int32_t>(uprv_strlen(tag) + 1), errorCode); 1290 if (U_SUCCESS(errorCode)) { 1291 ++fKeysCount; 1292 } 1293 return keypos; 1294 } 1295 1296 static int32_t 1297 compareInt32(int32_t lPos, int32_t rPos) { 1298 /* 1299 * Compare possibly-negative key offsets. Don't just return lPos - rPos 1300 * because that is prone to negative-integer underflows. 1301 */ 1302 if (lPos < rPos) { 1303 return -1; 1304 } else if (lPos > rPos) { 1305 return 1; 1306 } else { 1307 return 0; 1308 } 1309 } 1310 1311 static int32_t U_CALLCONV 1312 compareKeySuffixes(const void *context, const void *l, const void *r) { 1313 const struct SRBRoot* bundle = static_cast<const struct SRBRoot*>(context); 1314 int32_t lPos = static_cast<const KeyMapEntry*>(l)->oldpos; 1315 int32_t rPos = static_cast<const KeyMapEntry*>(r)->oldpos; 1316 const char *lStart = bundle->getKeyString(lPos); 1317 const char *lLimit = lStart; 1318 const char *rStart = bundle->getKeyString(rPos); 1319 const char *rLimit = rStart; 1320 int32_t diff; 1321 while (*lLimit != 0) { ++lLimit; } 1322 while (*rLimit != 0) { ++rLimit; } 1323 /* compare keys in reverse character order */ 1324 while (lStart < lLimit && rStart < rLimit) { 1325 diff = static_cast<int32_t>(static_cast<uint8_t>(*--lLimit)) - static_cast<int32_t>(static_cast<uint8_t>(*--rLimit)); 1326 if (diff != 0) { 1327 return diff; 1328 } 1329 } 1330 /* sort equal suffixes by descending key length */ 1331 diff = static_cast<int32_t>(rLimit - rStart) - static_cast<int32_t>(lLimit - lStart); 1332 if (diff != 0) { 1333 return diff; 1334 } 1335 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */ 1336 return compareInt32(lPos, rPos); 1337 } 1338 1339 static int32_t U_CALLCONV 1340 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) { 1341 return compareInt32(static_cast<const KeyMapEntry*>(l)->newpos, 1342 static_cast<const KeyMapEntry*>(r)->newpos); 1343 } 1344 1345 static int32_t U_CALLCONV 1346 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { 1347 return compareInt32(static_cast<const KeyMapEntry*>(l)->oldpos, 1348 static_cast<const KeyMapEntry*>(r)->oldpos); 1349 } 1350 1351 void SResource::collectKeys(std::function<void(int32_t)> collector) const { 1352 collector(fKey); 1353 } 1354 1355 void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const { 1356 collector(fKey); 1357 for (SResource* curr = fFirst; curr != nullptr; curr = curr->fNext) { 1358 curr->collectKeys(collector); 1359 } 1360 } 1361 1362 void 1363 SRBRoot::compactKeys(UErrorCode &errorCode) { 1364 KeyMapEntry *map; 1365 char *keys; 1366 int32_t i; 1367 1368 // Except for pool bundles, keys might not be used. 1369 // Do not add unused keys to the final bundle. 1370 std::set<int32_t> keysInUse; 1371 if (!fIsPoolBundle) { 1372 fRoot->collectKeys([&keysInUse](int32_t key) { 1373 if (key >= 0) { 1374 keysInUse.insert(key); 1375 } 1376 }); 1377 fKeysCount = static_cast<int32_t>(keysInUse.size()); 1378 } 1379 1380 int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount; 1381 if (U_FAILURE(errorCode) || fKeyMap != nullptr) { 1382 return; 1383 } 1384 map = static_cast<KeyMapEntry*>(uprv_malloc(keysCount * sizeof(KeyMapEntry))); 1385 if (map == nullptr) { 1386 errorCode = U_MEMORY_ALLOCATION_ERROR; 1387 return; 1388 } 1389 keys = const_cast<char*>(fUsePoolBundle->fKeys); 1390 for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) { 1391 map[i].oldpos = 1392 static_cast<int32_t>(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */ 1393 map[i].newpos = 0; 1394 while (*keys != 0) { ++keys; } /* skip the key */ 1395 ++keys; /* skip the NUL */ 1396 } 1397 keys = fKeys + fKeysBottom; 1398 while (i < keysCount) { 1399 int32_t keyOffset = static_cast<int32_t>(keys - fKeys); 1400 if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) { 1401 // Mark the unused key as deleted 1402 while (*keys != 0) { *keys++ = 1; } 1403 *keys++ = 1; 1404 } else { 1405 map[i].oldpos = keyOffset; 1406 map[i].newpos = 0; 1407 while (*keys != 0) { ++keys; } /* skip the key */ 1408 ++keys; /* skip the NUL */ 1409 i++; 1410 } 1411 } 1412 if (keys != fKeys + fKeysTop) { 1413 // Throw away any unused keys from the end 1414 fKeysTop = static_cast<int32_t>(keys - fKeys); 1415 } 1416 /* Sort the keys so that each one is immediately followed by all of its suffixes. */ 1417 uprv_sortArray(map, keysCount, static_cast<int32_t>(sizeof(KeyMapEntry)), 1418 compareKeySuffixes, this, false, &errorCode); 1419 /* 1420 * Make suffixes point into earlier, longer strings that contain them 1421 * and mark the old, now unused suffix bytes as deleted. 1422 */ 1423 if (U_SUCCESS(errorCode)) { 1424 keys = fKeys; 1425 for (i = 0; i < keysCount;) { 1426 /* 1427 * This key is not a suffix of the previous one; 1428 * keep this one and delete the following ones that are 1429 * suffixes of this one. 1430 */ 1431 const char *key; 1432 const char *keyLimit; 1433 int32_t j = i + 1; 1434 map[i].newpos = map[i].oldpos; 1435 if (j < keysCount && map[j].oldpos < 0) { 1436 /* Key string from the pool bundle, do not delete. */ 1437 i = j; 1438 continue; 1439 } 1440 key = getKeyString(map[i].oldpos); 1441 for (keyLimit = key; *keyLimit != 0; ++keyLimit) {} 1442 for (; j < keysCount && map[j].oldpos >= 0; ++j) { 1443 const char *k; 1444 char *suffix; 1445 const char *suffixLimit; 1446 int32_t offset; 1447 suffix = keys + map[j].oldpos; 1448 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {} 1449 offset = static_cast<int32_t>((keyLimit - key) - (suffixLimit - suffix)); 1450 if (offset < 0) { 1451 break; /* suffix cannot be longer than the original */ 1452 } 1453 /* Is it a suffix of the earlier, longer key? */ 1454 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {} 1455 if (suffix == suffixLimit && *k == *suffixLimit) { 1456 map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */ 1457 // Mark the suffix as deleted 1458 while (*suffix != 0) { *suffix++ = 1; } 1459 *suffix = 1; 1460 } else { 1461 break; /* not a suffix, restart from here */ 1462 } 1463 } 1464 i = j; 1465 } 1466 /* 1467 * Re-sort by newpos, then modify the key characters array in-place 1468 * to squeeze out unused bytes, and readjust the newpos offsets. 1469 */ 1470 uprv_sortArray(map, keysCount, static_cast<int32_t>(sizeof(KeyMapEntry)), 1471 compareKeyNewpos, nullptr, false, &errorCode); 1472 if (U_SUCCESS(errorCode)) { 1473 int32_t oldpos, newpos, limit; 1474 oldpos = newpos = fKeysBottom; 1475 limit = fKeysTop; 1476 /* skip key offsets that point into the pool bundle rather than this new bundle */ 1477 for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {} 1478 if (i < keysCount) { 1479 while (oldpos < limit) { 1480 if (keys[oldpos] == 1) { 1481 ++oldpos; /* skip unused bytes */ 1482 } else { 1483 /* adjust the new offsets for keys starting here */ 1484 while (i < keysCount && map[i].newpos == oldpos) { 1485 map[i++].newpos = newpos; 1486 } 1487 /* move the key characters to their new position */ 1488 keys[newpos++] = keys[oldpos++]; 1489 } 1490 } 1491 U_ASSERT(i == keysCount); 1492 } 1493 fKeysTop = newpos; 1494 /* Re-sort once more, by old offsets for binary searching. */ 1495 uprv_sortArray(map, keysCount, static_cast<int32_t>(sizeof(KeyMapEntry)), 1496 compareKeyOldpos, nullptr, false, &errorCode); 1497 if (U_SUCCESS(errorCode)) { 1498 /* key size reduction by limit - newpos */ 1499 fKeyMap = map; 1500 map = nullptr; 1501 } 1502 } 1503 } 1504 uprv_free(map); 1505 } 1506 1507 static int32_t U_CALLCONV 1508 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) { 1509 const StringResource *left = *static_cast<const StringResource* const*>(l); 1510 const StringResource *right = *static_cast<const StringResource* const*>(r); 1511 const char16_t *lStart = left->getBuffer(); 1512 const char16_t *lLimit = lStart + left->length(); 1513 const char16_t *rStart = right->getBuffer(); 1514 const char16_t *rLimit = rStart + right->length(); 1515 int32_t diff; 1516 /* compare keys in reverse character order */ 1517 while (lStart < lLimit && rStart < rLimit) { 1518 diff = static_cast<int32_t>(*--lLimit) - static_cast<int32_t>(*--rLimit); 1519 if (diff != 0) { 1520 return diff; 1521 } 1522 } 1523 /* sort equal suffixes by descending string length */ 1524 return right->length() - left->length(); 1525 } 1526 1527 static int32_t U_CALLCONV 1528 compareStringLengths(const void * /*context*/, const void *l, const void *r) { 1529 const StringResource *left = *static_cast<const StringResource* const*>(l); 1530 const StringResource *right = *static_cast<const StringResource* const*>(r); 1531 int32_t diff; 1532 /* Make "is suffix of another string" compare greater than a non-suffix. */ 1533 diff = static_cast<int>(left->fSame != nullptr) - static_cast<int>(right->fSame != nullptr); 1534 if (diff != 0) { 1535 return diff; 1536 } 1537 /* sort by ascending string length */ 1538 diff = left->length() - right->length(); 1539 if (diff != 0) { 1540 return diff; 1541 } 1542 // sort by descending size reduction 1543 diff = right->fNumUnitsSaved - left->fNumUnitsSaved; 1544 if (diff != 0) { 1545 return diff; 1546 } 1547 // sort lexically 1548 return left->fString.compare(right->fString); 1549 } 1550 1551 void 1552 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) { 1553 int32_t len = length(); 1554 fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length()); 1555 fWritten = true; 1556 switch(fNumCharsForLength) { 1557 case 0: 1558 break; 1559 case 1: 1560 dest.append(static_cast<char16_t>(0xdc00 + len)); 1561 break; 1562 case 2: 1563 dest.append(static_cast<char16_t>(0xdfef + (len >> 16))); 1564 dest.append(static_cast<char16_t>(len)); 1565 break; 1566 case 3: 1567 dest.append(static_cast<char16_t>(0xdfff)); 1568 dest.append(static_cast<char16_t>(len >> 16)); 1569 dest.append(static_cast<char16_t>(len)); 1570 break; 1571 default: 1572 break; /* will not occur */ 1573 } 1574 dest.append(fString); 1575 dest.append(static_cast<char16_t>(0)); 1576 } 1577 1578 void 1579 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) { 1580 if (U_FAILURE(errorCode)) { 1581 return; 1582 } 1583 // Store the StringResource pointers in an array for 1584 // easy sorting and processing. 1585 // We enumerate a set of strings, so there are no duplicates. 1586 int32_t count = uhash_count(stringSet); 1587 LocalArray<StringResource *> array(new StringResource *[count], errorCode); 1588 if (U_FAILURE(errorCode)) { 1589 return; 1590 } 1591 for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) { 1592 array[i] = static_cast<StringResource*>(uhash_nextElement(stringSet, &pos)->key.pointer); 1593 } 1594 /* Sort the strings so that each one is immediately followed by all of its suffixes. */ 1595 uprv_sortArray(array.getAlias(), count, static_cast<int32_t>(sizeof(struct SResource**)), 1596 compareStringSuffixes, nullptr, false, &errorCode); 1597 if (U_FAILURE(errorCode)) { 1598 return; 1599 } 1600 /* 1601 * Make suffixes point into earlier, longer strings that contain them. 1602 * Temporarily use fSame and fSuffixOffset for suffix strings to 1603 * refer to the remaining ones. 1604 */ 1605 for (int32_t i = 0; i < count;) { 1606 /* 1607 * This string is not a suffix of the previous one; 1608 * write this one and subsume the following ones that are 1609 * suffixes of this one. 1610 */ 1611 StringResource *res = array[i]; 1612 res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength(); 1613 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit, 1614 // see StringResource::handlePreflightStrings(). 1615 int32_t j; 1616 for (j = i + 1; j < count; ++j) { 1617 StringResource *suffixRes = array[j]; 1618 /* Is it a suffix of the earlier, longer string? */ 1619 if (res->fString.endsWith(suffixRes->fString)) { 1620 assert(res->length() != suffixRes->length()); // Set strings are unique. 1621 if (suffixRes->fWritten) { 1622 // Pool string, skip. 1623 } else if (suffixRes->fNumCharsForLength == 0) { 1624 /* yes, point to the earlier string */ 1625 suffixRes->fSame = res; 1626 suffixRes->fSuffixOffset = res->length() - suffixRes->length(); 1627 if (res->fWritten) { 1628 // Suffix-share res which is a pool string. 1629 // Compute the resource word and collect the maximum. 1630 suffixRes->fRes = 1631 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset; 1632 int32_t poolStringIndex = static_cast<int32_t>(RES_GET_OFFSET(suffixRes->fRes)); 1633 if (poolStringIndex >= fPoolStringIndexLimit) { 1634 fPoolStringIndexLimit = poolStringIndex + 1; 1635 } 1636 suffixRes->fWritten = true; 1637 } 1638 res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength(); 1639 } else { 1640 /* write the suffix by itself if we need explicit length */ 1641 } 1642 } else { 1643 break; /* not a suffix, restart from here */ 1644 } 1645 } 1646 i = j; 1647 } 1648 /* 1649 * Re-sort the strings by ascending length (except suffixes last) 1650 * to optimize for URES_TABLE16 and URES_ARRAY16: 1651 * Keep as many as possible within reach of 16-bit offsets. 1652 */ 1653 uprv_sortArray(array.getAlias(), count, static_cast<int32_t>(sizeof(struct SResource**)), 1654 compareStringLengths, nullptr, false, &errorCode); 1655 if (U_FAILURE(errorCode)) { 1656 return; 1657 } 1658 if (fIsPoolBundle) { 1659 // Write strings that are sufficiently shared. 1660 // Avoid writing other strings. 1661 int32_t numStringsWritten = 0; 1662 int32_t numUnitsSaved = 0; 1663 int32_t numUnitsNotSaved = 0; 1664 for (int32_t i = 0; i < count; ++i) { 1665 StringResource *res = array[i]; 1666 // Maximum pool string index when suffix-sharing the last character. 1667 int32_t maxStringIndex = 1668 f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1; 1669 if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING && 1670 maxStringIndex < RES_MAX_OFFSET) { 1671 res->writeUTF16v2(0, f16BitUnits); 1672 ++numStringsWritten; 1673 numUnitsSaved += res->fNumUnitsSaved; 1674 } else { 1675 numUnitsNotSaved += res->fNumUnitsSaved; 1676 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING); 1677 res->fWritten = true; 1678 } 1679 } 1680 if (f16BitUnits.isBogus()) { 1681 errorCode = U_MEMORY_ALLOCATION_ERROR; 1682 } 1683 if (getShowWarning()) { // not quiet 1684 printf("number of shared strings: %d\n", static_cast<int>(numStringsWritten)); 1685 printf("16-bit units for strings: %6d = %6d bytes\n", 1686 static_cast<int>(f16BitUnits.length()), static_cast<int>(f16BitUnits.length()) * 2); 1687 printf("16-bit units saved: %6d = %6d bytes\n", 1688 static_cast<int>(numUnitsSaved), static_cast<int>(numUnitsSaved) * 2); 1689 printf("16-bit units not saved: %6d = %6d bytes\n", 1690 static_cast<int>(numUnitsNotSaved), static_cast<int>(numUnitsNotSaved) * 2); 1691 } 1692 } else { 1693 assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit); 1694 /* Write the non-suffix strings. */ 1695 int32_t i; 1696 for (i = 0; i < count && array[i]->fSame == nullptr; ++i) { 1697 StringResource *res = array[i]; 1698 if (!res->fWritten) { 1699 int32_t localStringIndex = f16BitUnits.length(); 1700 if (localStringIndex >= fLocalStringIndexLimit) { 1701 fLocalStringIndexLimit = localStringIndex + 1; 1702 } 1703 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits); 1704 } 1705 } 1706 if (f16BitUnits.isBogus()) { 1707 errorCode = U_MEMORY_ALLOCATION_ERROR; 1708 return; 1709 } 1710 if (fWritePoolBundle != nullptr && gFormatVersion >= 3) { 1711 PseudoListResource *poolStrings = 1712 static_cast<PseudoListResource *>(fWritePoolBundle->fRoot); 1713 for (i = 0; i < count && array[i]->fSame == nullptr; ++i) { 1714 assert(!array[i]->fString.isEmpty()); 1715 StringResource *poolString = 1716 new StringResource(fWritePoolBundle, array[i]->fString, errorCode); 1717 if (poolString == nullptr) { 1718 errorCode = U_MEMORY_ALLOCATION_ERROR; 1719 break; 1720 } 1721 poolStrings->add(poolString); 1722 } 1723 } 1724 /* Write the suffix strings. Make each point to the real string. */ 1725 for (; i < count; ++i) { 1726 StringResource *res = array[i]; 1727 if (res->fWritten) { 1728 continue; 1729 } 1730 StringResource *same = res->fSame; 1731 assert(res->length() != same->length()); // Set strings are unique. 1732 res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset; 1733 int32_t localStringIndex = static_cast<int32_t>(RES_GET_OFFSET(res->fRes)) - fPoolStringIndexLimit; 1734 // Suffixes of pool strings have been set already. 1735 assert(localStringIndex >= 0); 1736 if (localStringIndex >= fLocalStringIndexLimit) { 1737 fLocalStringIndexLimit = localStringIndex + 1; 1738 } 1739 res->fWritten = true; 1740 } 1741 } 1742 // +1 to account for the initial zero in f16BitUnits 1743 assert(f16BitUnits.length() <= (f16BitStringsLength + 1)); 1744 } 1745 1746 void SResource::applyFilter( 1747 const PathFilter& /*filter*/, 1748 ResKeyPath& /*path*/, 1749 const SRBRoot* /*bundle*/) { 1750 // Only a few resource types (tables) are capable of being filtered. 1751 } 1752 1753 void TableResource::applyFilter( 1754 const PathFilter& filter, 1755 ResKeyPath& path, 1756 const SRBRoot* bundle) { 1757 SResource* prev = nullptr; 1758 SResource* curr = fFirst; 1759 for (; curr != nullptr;) { 1760 path.push(curr->getKeyString(bundle)); 1761 auto inclusion = filter.match(path); 1762 if (inclusion == PathFilter::EInclusion::INCLUDE) { 1763 // Include whole subtree 1764 // no-op 1765 if (isVerbose()) { 1766 std::cout << "genrb subtree: " << bundle->fLocale << ": INCLUDE: " << path << std::endl; 1767 } 1768 } else if (inclusion == PathFilter::EInclusion::EXCLUDE) { 1769 // Reject the whole subtree 1770 // Remove it from the linked list 1771 if (isVerbose()) { 1772 std::cout << "genrb subtree: " << bundle->fLocale << ": DELETE: " << path << std::endl; 1773 } 1774 if (prev == nullptr) { 1775 fFirst = curr->fNext; 1776 } else { 1777 prev->fNext = curr->fNext; 1778 } 1779 fCount--; 1780 delete curr; 1781 curr = prev; 1782 } else { 1783 U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL); 1784 // Recurse into the child 1785 curr->applyFilter(filter, path, bundle); 1786 } 1787 path.pop(); 1788 1789 prev = curr; 1790 if (curr == nullptr) { 1791 curr = fFirst; 1792 } else { 1793 curr = curr->fNext; 1794 } 1795 } 1796 }