derb.cpp (22499B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1999-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: derb.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2000sep6 16 * created by: Vladimir Weinstein as an ICU workshop example 17 * maintained by: Yves Arrouye <yves@realnames.com> 18 */ 19 20 #include "unicode/stringpiece.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/unistr.h" 23 #include "unicode/ustring.h" 24 #include "unicode/putil.h" 25 #include "unicode/ustdio.h" 26 27 #include "charstr.h" 28 #include "uresimp.h" 29 #include "cmemory.h" 30 #include "cstring.h" 31 #include "uoptions.h" 32 #include "toolutil.h" 33 #include "ustrfmt.h" 34 35 #if !UCONFIG_NO_FORMATTING 36 37 #define DERB_VERSION "1.1" 38 39 #define DERB_DEFAULT_TRUNC 80 40 41 static const int32_t indentsize = 4; 42 static int32_t truncsize = DERB_DEFAULT_TRUNC; 43 static UBool opt_truncate = false; 44 45 static const char *getEncodingName(const char *encoding); 46 static void reportError(const char *pname, UErrorCode *status, const char *when); 47 static char16_t *quotedString(const char16_t *string); 48 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status); 49 static void printString(UFILE *out, const char16_t *str, int32_t len); 50 static void printCString(UFILE *out, const char *str, int32_t len); 51 static void printIndent(UFILE *out, int32_t indent); 52 static void printHex(UFILE *out, uint8_t what); 53 54 static UOption options[]={ 55 UOPTION_HELP_H, 56 UOPTION_HELP_QUESTION_MARK, 57 /* 2 */ UOPTION_ENCODING, 58 /* 3 */ { "to-stdout", nullptr, nullptr, nullptr, 'c', UOPT_NO_ARG, 0 } , 59 /* 4 */ { "truncate", nullptr, nullptr, nullptr, 't', UOPT_OPTIONAL_ARG, 0 }, 60 /* 5 */ UOPTION_VERBOSE, 61 /* 6 */ UOPTION_DESTDIR, 62 /* 7 */ UOPTION_SOURCEDIR, 63 /* 8 */ { "bom", nullptr, nullptr, nullptr, 0, UOPT_NO_ARG, 0 }, 64 /* 9 */ UOPTION_ICUDATADIR, 65 /* 10 */ UOPTION_VERSION, 66 /* 11 */ { "suppressAliases", nullptr, nullptr, nullptr, 'A', UOPT_NO_ARG, 0 }, 67 }; 68 69 static UBool verbose = false; 70 static UBool suppressAliases = false; 71 static UFILE *ustderr = nullptr; 72 73 extern int 74 main(int argc, char* argv[]) { 75 const char *encoding = nullptr; 76 const char *outputDir = nullptr; /* nullptr = no output directory, use current */ 77 const char *inputDir = "."; 78 int tostdout = 0; 79 int prbom = 0; 80 81 const char *pname; 82 83 UResourceBundle *bundle = nullptr; 84 int32_t i = 0; 85 86 const char* arg; 87 88 /* Get the name of tool. */ 89 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); 90 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR 91 if (!pname) { 92 pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); 93 } 94 #endif 95 if (!pname) { 96 pname = *argv; 97 } else { 98 ++pname; 99 } 100 101 /* error handling, printing usage message */ 102 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); 103 104 /* error handling, printing usage message */ 105 if(argc<0) { 106 fprintf(stderr, 107 "%s: error in command line argument \"%s\"\n", pname, 108 argv[-argc]); 109 } 110 if(argc<0 || options[0].doesOccur || options[1].doesOccur) { 111 fprintf(argc < 0 ? stderr : stdout, 112 "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" 113 " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" 114 " [ -t, --truncate [ size ] ]\n" 115 " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" 116 " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" 117 " [ -A, --suppressAliases]\n" 118 " bundle ...\n", argc < 0 ? 'u' : 'U', 119 pname); 120 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 121 } 122 123 if(options[10].doesOccur) { 124 fprintf(stderr, 125 "%s version %s (ICU version %s).\n" 126 "%s\n", 127 pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); 128 return U_ZERO_ERROR; 129 } 130 if(options[2].doesOccur) { 131 encoding = options[2].value; 132 } 133 134 if (options[3].doesOccur) { 135 if(options[2].doesOccur) { 136 fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname); 137 return 3; 138 } 139 tostdout = 1; 140 } 141 142 if(options[4].doesOccur) { 143 opt_truncate = true; 144 if(options[4].value != nullptr) { 145 truncsize = atoi(options[4].value); /* user defined printable size */ 146 } else { 147 truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ 148 } 149 } else { 150 opt_truncate = false; 151 } 152 153 if(options[5].doesOccur) { 154 verbose = true; 155 } 156 157 if (options[6].doesOccur) { 158 outputDir = options[6].value; 159 } 160 161 if(options[7].doesOccur) { 162 inputDir = options[7].value; /* we'll use users resources */ 163 } 164 165 if (options[8].doesOccur) { 166 prbom = 1; 167 } 168 169 if (options[9].doesOccur) { 170 u_setDataDirectory(options[9].value); 171 } 172 173 if (options[11].doesOccur) { 174 suppressAliases = true; 175 } 176 177 fflush(stderr); // use ustderr now. 178 ustderr = u_finit(stderr, nullptr, nullptr); 179 180 for (i = 1; i < argc; ++i) { 181 static const char16_t sp[] = { 0x0020 }; /* " " */ 182 183 arg = getLongPathname(argv[i]); 184 185 if (verbose) { 186 u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]); 187 } 188 189 icu::CharString locale; 190 UErrorCode status = U_ZERO_ERROR; 191 { 192 const char *p = findBasename(arg); 193 const char *q = uprv_strrchr(p, '.'); 194 if (q == nullptr) { 195 locale.append(p, status); 196 } else { 197 locale.append(p, static_cast<int32_t>(q - p), status); 198 } 199 } 200 if (U_FAILURE(status)) { 201 return status; 202 } 203 204 icu::CharString infile; 205 const char *thename = nullptr; 206 UBool fromICUData = !uprv_strcmp(inputDir, "-"); 207 if (!fromICUData) { 208 UBool absfilename = *arg == U_FILE_SEP_CHAR; 209 #if U_PLATFORM_HAS_WIN32_API 210 if (!absfilename) { 211 absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) 212 && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); 213 } 214 #endif 215 if (absfilename) { 216 thename = arg; 217 } else { 218 const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR); 219 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR 220 if (q == nullptr) { 221 q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); 222 } 223 #endif 224 infile.append(inputDir, status); 225 if(q != nullptr) { 226 infile.appendPathPart(icu::StringPiece(arg, static_cast<int32_t>(q - arg)), status); 227 } 228 if (U_FAILURE(status)) { 229 return status; 230 } 231 thename = infile.data(); 232 } 233 } 234 if (thename) { 235 bundle = ures_openDirect(thename, locale.data(), &status); 236 } else { 237 bundle = ures_open(fromICUData ? nullptr : inputDir, locale.data(), &status); 238 } 239 if (U_SUCCESS(status)) { 240 UFILE *out = nullptr; 241 242 const char* filename = nullptr; 243 const char* ext = nullptr; 244 245 if (locale.isEmpty() || !tostdout) { 246 filename = findBasename(arg); 247 ext = uprv_strrchr(filename, '.'); 248 if (!ext) { 249 ext = uprv_strchr(filename, 0); 250 } 251 } 252 253 if (tostdout) { 254 out = u_get_stdout(); 255 } else { 256 icu::CharString thefile; 257 if (outputDir) { 258 thefile.append(outputDir, status); 259 } 260 thefile.appendPathPart(filename, status); 261 if (*ext) { 262 thefile.truncate(thefile.length() - static_cast<int32_t>(uprv_strlen(ext))); 263 } 264 thefile.append(".txt", status); 265 if (U_FAILURE(status)) { 266 return status; 267 } 268 269 out = u_fopen(thefile.data(), "w", nullptr, encoding); 270 if (!out) { 271 u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data()); 272 u_fclose(ustderr); 273 return 4; 274 } 275 } 276 277 // now, set the callback. 278 ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, nullptr, nullptr, &status); 279 if (U_FAILURE(status)) { 280 u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname); 281 u_fclose(ustderr); 282 if(!tostdout) { 283 u_fclose(out); 284 } 285 return 3; 286 } 287 288 if (prbom) { /* XXX: Should be done only for UTFs */ 289 u_fputc(0xFEFF, out); 290 } 291 u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName())); 292 u_fprintf(out, "// This file was dumped by derb(8) from "); 293 if (thename) { 294 u_fprintf(out, "%s", thename); 295 } else if (fromICUData) { 296 u_fprintf(out, "the ICU internal %s locale", locale.data()); 297 } 298 299 u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n"); 300 301 if (!locale.isEmpty()) { 302 u_fprintf(out, "%s", locale.data()); 303 } else { 304 u_fprintf(out, "%.*s%.*S", static_cast<int32_t>(ext - filename), filename, UPRV_LENGTHOF(sp), sp); 305 } 306 printOutBundle(out, bundle, 0, pname, &status); 307 308 if (!tostdout) { 309 u_fclose(out); 310 } 311 } 312 else { 313 reportError(pname, &status, "opening resource file"); 314 } 315 316 ures_close(bundle); 317 } 318 319 return 0; 320 } 321 322 static char16_t *quotedString(const char16_t *string) { 323 int len = u_strlen(string); 324 int alen = len; 325 const char16_t *sp; 326 char16_t *newstr, *np; 327 328 for (sp = string; *sp; ++sp) { 329 switch (*sp) { 330 case '\n': 331 case 0x0022: 332 ++alen; 333 break; 334 } 335 } 336 337 newstr = static_cast<char16_t*>(uprv_malloc((1 + alen) * U_SIZEOF_UCHAR)); 338 for (sp = string, np = newstr; *sp; ++sp) { 339 switch (*sp) { 340 case '\n': 341 *np++ = 0x005C; 342 *np++ = 0x006E; 343 break; 344 345 case 0x0022: 346 *np++ = 0x005C; 347 U_FALLTHROUGH; 348 default: 349 *np++ = *sp; 350 break; 351 } 352 } 353 *np = 0; 354 355 return newstr; 356 } 357 358 359 static void printString(UFILE *out, const char16_t *str, int32_t len) { 360 u_file_write(str, len, out); 361 } 362 363 static void printCString(UFILE *out, const char *str, int32_t len) { 364 if(len==-1) { 365 u_fprintf(out, "%s", str); 366 } else { 367 u_fprintf(out, "%.*s", len, str); 368 } 369 } 370 371 static void printIndent(UFILE *out, int32_t indent) { 372 icu::UnicodeString inchar(indent, 0x20, indent); 373 printString(out, inchar.getBuffer(), indent); 374 } 375 376 static void printHex(UFILE *out, uint8_t what) { 377 static const char map[] = "0123456789ABCDEF"; 378 char16_t hex[2]; 379 380 hex[0] = map[what >> 4]; 381 hex[1] = map[what & 0xf]; 382 383 printString(out, hex, 2); 384 } 385 386 static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) { 387 static const char16_t cr[] = { 0xA }; // LF 388 int32_t len = 0; 389 const char16_t* thestr = res_getAlias(&(parent->getResData()), r, &len); 390 char16_t *string = quotedString(thestr); 391 if(opt_truncate && len > truncsize) { 392 char msg[128]; 393 printIndent(out, indent); 394 snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n", 395 static_cast<long>(len), static_cast<long>(truncsize) / 2); 396 printCString(out, msg, -1); 397 len = truncsize; 398 } 399 if(U_SUCCESS(*status)) { 400 static const char16_t openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */ 401 static const char16_t closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */ 402 printIndent(out, indent); 403 if(key != nullptr) { 404 printCString(out, key, -1); 405 } 406 printString(out, openStr, UPRV_LENGTHOF(openStr)); 407 printString(out, string, len); 408 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 409 if(verbose) { 410 printCString(out, " // ALIAS", -1); 411 } 412 printString(out, cr, UPRV_LENGTHOF(cr)); 413 } else { 414 reportError(pname, status, "getting binary value"); 415 } 416 uprv_free(string); 417 } 418 419 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status) 420 { 421 static const char16_t cr[] = { 0xA }; // LF 422 423 /* int32_t noOfElements = ures_getSize(resource);*/ 424 int32_t i = 0; 425 const char *key = ures_getKey(resource); 426 427 switch(ures_getType(resource)) { 428 case URES_STRING : 429 { 430 int32_t len=0; 431 const char16_t* thestr = ures_getString(resource, &len, status); 432 char16_t *string = quotedString(thestr); 433 434 /* TODO: String truncation */ 435 if(opt_truncate && len > truncsize) { 436 char msg[128]; 437 printIndent(out, indent); 438 snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n", 439 static_cast<long>(len), static_cast<long>(truncsize / 2)); 440 printCString(out, msg, -1); 441 len = truncsize/2; 442 } 443 printIndent(out, indent); 444 if(key != nullptr) { 445 static const char16_t openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */ 446 static const char16_t closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */ 447 printCString(out, key, static_cast<int32_t>(uprv_strlen(key))); 448 printString(out, openStr, UPRV_LENGTHOF(openStr)); 449 printString(out, string, len); 450 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 451 } else { 452 static const char16_t openStr[] = { 0x0022 }; /* "\"" */ 453 static const char16_t closeStr[] = { 0x0022, 0x002C }; /* "\"," */ 454 455 printString(out, openStr, UPRV_LENGTHOF(openStr)); 456 printString(out, string, u_strlen(string)); 457 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 458 } 459 460 if(verbose) { 461 printCString(out, "// STRING", -1); 462 } 463 printString(out, cr, UPRV_LENGTHOF(cr)); 464 465 uprv_free(string); 466 } 467 break; 468 469 case URES_INT : 470 { 471 static const char16_t openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */ 472 static const char16_t closeStr[] = { 0x0020, 0x007D }; /* " }" */ 473 char16_t num[20]; 474 475 printIndent(out, indent); 476 if(key != nullptr) { 477 printCString(out, key, -1); 478 } 479 printString(out, openStr, UPRV_LENGTHOF(openStr)); 480 uprv_itou(num, 20, ures_getInt(resource, status), 10, 0); 481 printString(out, num, u_strlen(num)); 482 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 483 484 if(verbose) { 485 printCString(out, "// INT", -1); 486 } 487 printString(out, cr, UPRV_LENGTHOF(cr)); 488 break; 489 } 490 case URES_BINARY : 491 { 492 int32_t len = 0; 493 const int8_t* data = reinterpret_cast<const int8_t*>(ures_getBinary(resource, &len, status)); 494 if(opt_truncate && len > truncsize) { 495 char msg[128]; 496 printIndent(out, indent); 497 snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n", 498 static_cast<long>(len), static_cast<long>(truncsize / 2)); 499 printCString(out, msg, -1); 500 len = truncsize; 501 } 502 if(U_SUCCESS(*status)) { 503 static const char16_t openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */ 504 static const char16_t closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ 505 printIndent(out, indent); 506 if(key != nullptr) { 507 printCString(out, key, -1); 508 } 509 printString(out, openStr, UPRV_LENGTHOF(openStr)); 510 for(i = 0; i<len; i++) { 511 printHex(out, *data++); 512 } 513 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 514 if(verbose) { 515 printCString(out, " // BINARY", -1); 516 } 517 printString(out, cr, UPRV_LENGTHOF(cr)); 518 } else { 519 reportError(pname, status, "getting binary value"); 520 } 521 } 522 break; 523 case URES_INT_VECTOR : 524 { 525 int32_t len = 0; 526 const int32_t *data = ures_getIntVector(resource, &len, status); 527 if(U_SUCCESS(*status)) { 528 static const char16_t openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */ 529 static const char16_t closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ 530 char16_t num[20]; 531 532 printIndent(out, indent); 533 if(key != nullptr) { 534 printCString(out, key, -1); 535 } 536 printString(out, openStr, UPRV_LENGTHOF(openStr)); 537 for(i = 0; i < len - 1; i++) { 538 int32_t numLen = uprv_itou(num, 20, data[i], 10, 0); 539 num[numLen++] = 0x002C; /* ',' */ 540 num[numLen++] = 0x0020; /* ' ' */ 541 num[numLen] = 0; 542 printString(out, num, u_strlen(num)); 543 } 544 if(len > 0) { 545 uprv_itou(num, 20, data[len - 1], 10, 0); 546 printString(out, num, u_strlen(num)); 547 } 548 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 549 if(verbose) { 550 printCString(out, "// INTVECTOR", -1); 551 } 552 printString(out, cr, UPRV_LENGTHOF(cr)); 553 } else { 554 reportError(pname, status, "getting int vector"); 555 } 556 } 557 break; 558 case URES_TABLE : 559 case URES_ARRAY : 560 { 561 static const char16_t openStr[] = { 0x007B }; /* "{" */ 562 static const char16_t closeStr[] = { 0x007D, '\n' }; /* "}\n" */ 563 564 UResourceBundle *t = nullptr; 565 ures_resetIterator(resource); 566 printIndent(out, indent); 567 if(key != nullptr) { 568 printCString(out, key, -1); 569 } 570 printString(out, openStr, UPRV_LENGTHOF(openStr)); 571 if(verbose) { 572 if(ures_getType(resource) == URES_TABLE) { 573 printCString(out, "// TABLE", -1); 574 } else { 575 printCString(out, "// ARRAY", -1); 576 } 577 } 578 printString(out, cr, UPRV_LENGTHOF(cr)); 579 580 if(suppressAliases == false) { 581 while(U_SUCCESS(*status) && ures_hasNext(resource)) { 582 t = ures_getNextResource(resource, t, status); 583 if(U_SUCCESS(*status)) { 584 printOutBundle(out, t, indent+indentsize, pname, status); 585 } else { 586 reportError(pname, status, "While processing table"); 587 *status = U_ZERO_ERROR; 588 } 589 } 590 } else { /* we have to use low level access to do this */ 591 Resource r; 592 int32_t resSize = ures_getSize(resource); 593 UBool isTable = static_cast<UBool>(ures_getType(resource) == URES_TABLE); 594 for(i = 0; i < resSize; i++) { 595 /* need to know if it's an alias */ 596 if(isTable) { 597 r = res_getTableItemByIndex(&resource->getResData(), resource->fRes, i, &key); 598 } else { 599 r = res_getArrayItem(&resource->getResData(), resource->fRes, i); 600 } 601 if(U_SUCCESS(*status)) { 602 if(res_getPublicType(r) == URES_ALIAS) { 603 printOutAlias(out, resource, r, key, indent+indentsize, pname, status); 604 } else { 605 t = ures_getByIndex(resource, i, t, status); 606 printOutBundle(out, t, indent+indentsize, pname, status); 607 } 608 } else { 609 reportError(pname, status, "While processing table"); 610 *status = U_ZERO_ERROR; 611 } 612 } 613 } 614 615 printIndent(out, indent); 616 printString(out, closeStr, UPRV_LENGTHOF(closeStr)); 617 ures_close(t); 618 } 619 break; 620 default: 621 break; 622 } 623 624 } 625 626 static const char *getEncodingName(const char *encoding) { 627 UErrorCode err; 628 const char *enc; 629 630 err = U_ZERO_ERROR; 631 if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) { 632 err = U_ZERO_ERROR; 633 if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) { 634 // do nothing 635 } 636 } 637 638 return enc; 639 } 640 641 static void reportError(const char *pname, UErrorCode *status, const char *when) { 642 u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status)); 643 } 644 645 #else 646 extern int 647 main(int argc, char* argv[]) { 648 /* Changing stdio.h ustdio.h requires that formatting not be disabled. */ 649 return 3; 650 } 651 #endif /* !UCONFIG_NO_FORMATTING */ 652 653 /* 654 * Local Variables: 655 * indent-tabs-mode: nil 656 * End: 657 */