writesrc.cpp (16312B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2005-2012, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: writesrc.c 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005apr23 16 * created by: Markus W. Scherer 17 * 18 * Helper functions for writing source code for data. 19 */ 20 21 #include <stdio.h> 22 #include <time.h> 23 24 // The C99 standard suggested that C++ implementations not define PRId64 etc. constants 25 // unless this macro is defined. 26 // See the Notes at https://en.cppreference.com/w/cpp/types/integer . 27 // Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h . 28 #ifndef __STDC_FORMAT_MACROS 29 # define __STDC_FORMAT_MACROS 30 #endif 31 #include <cinttypes> 32 33 #include "unicode/utypes.h" 34 #include "unicode/putil.h" 35 #include "unicode/ucptrie.h" 36 #include "unicode/errorcode.h" 37 #include "unicode/uniset.h" 38 #include "unicode/usetiter.h" 39 #include "unicode/utf16.h" 40 #include "utrie2.h" 41 #include "cstring.h" 42 #include "writesrc.h" 43 #include "util.h" 44 45 U_NAMESPACE_BEGIN 46 47 ValueNameGetter::~ValueNameGetter() {} 48 49 U_NAMESPACE_END 50 51 U_NAMESPACE_USE 52 53 static FILE * 54 usrc_createWithoutHeader(const char *path, const char *filename) { 55 char buffer[1024]; 56 const char *p; 57 char *q; 58 FILE *f; 59 char c; 60 61 if(path==nullptr) { 62 p=filename; 63 } else { 64 /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ 65 uprv_strcpy(buffer, path); 66 q=buffer+uprv_strlen(buffer); 67 if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { 68 *q++=U_FILE_SEP_CHAR; 69 } 70 uprv_strcpy(q, filename); 71 p=buffer; 72 } 73 74 f=fopen(p, "w"); 75 if (f==nullptr) { 76 fprintf( 77 stderr, 78 "usrc_create(%s, %s): unable to create file\n", 79 path!=nullptr ? path : "", filename); 80 } 81 return f; 82 } 83 84 U_CAPI FILE * U_EXPORT2 85 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { 86 FILE *f = usrc_createWithoutHeader(path, filename); 87 if (f == nullptr) { 88 return f; 89 } 90 usrc_writeCopyrightHeader(f, "//", copyrightYear); 91 usrc_writeFileNameGeneratedBy(f, "//", filename, generator); 92 return f; 93 } 94 95 U_CAPI FILE * U_EXPORT2 96 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { 97 FILE *f = usrc_createWithoutHeader(path, filename); 98 if (f == nullptr) { 99 return f; 100 } 101 usrc_writeCopyrightHeader(f, "#", copyrightYear); 102 usrc_writeFileNameGeneratedBy(f, "#", filename, generator); 103 return f; 104 } 105 106 U_CAPI void U_EXPORT2 107 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) { 108 fprintf(f, 109 "%s Copyright (C) %d and later: Unicode, Inc. and others.\n" 110 "%s License & terms of use: http://www.unicode.org/copyright.html\n", 111 prefix, copyrightYear, prefix); 112 if (copyrightYear <= 2016) { 113 fprintf(f, 114 "%s Copyright (C) 1999-2016, International Business Machines\n" 115 "%s Corporation and others. All Rights Reserved.\n", 116 prefix, prefix); 117 } 118 } 119 120 U_CAPI void U_EXPORT2 121 usrc_writeFileNameGeneratedBy( 122 FILE *f, 123 const char *prefix, 124 const char *filename, 125 const char *generator) { 126 char buffer[1024]; 127 const struct tm *lt; 128 time_t t; 129 130 const char *pattern = 131 "%s\n" 132 "%s file name: %s\n" 133 "%s\n" 134 "%s machine-generated by: %s\n" 135 "\n"; 136 137 time(&t); 138 lt=localtime(&t); 139 if(generator==nullptr && lt!=nullptr) { 140 strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); 141 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer); 142 } else { 143 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator); 144 } 145 } 146 147 U_CAPI void U_EXPORT2 148 usrc_writeArray(FILE *f, 149 const char *prefix, 150 const void *p, int32_t width, int32_t length, 151 const char *indent, 152 const char *postfix) { 153 const uint8_t *p8; 154 const uint16_t *p16; 155 const uint32_t *p32; 156 const int64_t *p64; // Signed due to TOML! 157 int64_t value; // Signed due to TOML! 158 int32_t i, col; 159 160 p8=nullptr; 161 p16=nullptr; 162 p32=nullptr; 163 p64=nullptr; 164 switch(width) { 165 case 1: 166 case 8: 167 p8=(const uint8_t *)p; 168 break; 169 case 16: 170 p16=(const uint16_t *)p; 171 break; 172 case 32: 173 p32=(const uint32_t *)p; 174 break; 175 case 64: 176 p64=(const int64_t *)p; 177 break; 178 default: 179 fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); 180 return; 181 } 182 if(prefix!=nullptr) { 183 fprintf(f, prefix, (long)length); 184 } 185 for(i=col=0; i<length; ++i, ++col) { 186 if(i>0) { 187 if(col<16) { 188 fputc(',', f); 189 } else { 190 fputs(",\n", f); 191 fputs(indent, f); 192 col=0; 193 } 194 } 195 switch(width) { 196 case 1: 197 case 8: 198 value=p8[i]; 199 break; 200 case 16: 201 value=p16[i]; 202 break; 203 case 32: 204 value=p32[i]; 205 break; 206 case 64: 207 value=p64[i]; 208 break; 209 default: 210 value=0; /* unreachable */ 211 break; 212 } 213 if (width == 1) { 214 fprintf(f, value ? "true" : "false"); 215 } else { 216 fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value); 217 } 218 } 219 if(postfix!=nullptr) { 220 fputs(postfix, f); 221 } 222 } 223 224 U_CAPI void U_EXPORT2 225 usrc_writeUTrie2Arrays(FILE *f, 226 const char *indexPrefix, const char *data32Prefix, 227 const UTrie2 *pTrie, 228 const char *postfix) { 229 if(pTrie->data32==nullptr) { 230 /* 16-bit trie */ 231 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix); 232 } else { 233 /* 32-bit trie */ 234 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix); 235 usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix); 236 } 237 } 238 239 U_CAPI void U_EXPORT2 240 usrc_writeUTrie2Struct(FILE *f, 241 const char *prefix, 242 const UTrie2 *pTrie, 243 const char *indexName, const char *data32Name, 244 const char *postfix) { 245 if(prefix!=nullptr) { 246 fputs(prefix, f); 247 } 248 if(pTrie->data32==nullptr) { 249 /* 16-bit trie */ 250 fprintf( 251 f, 252 " %s,\n" /* index */ 253 " %s+%ld,\n" /* data16 */ 254 " nullptr,\n", /* data32 */ 255 indexName, 256 indexName, 257 (long)pTrie->indexLength); 258 } else { 259 /* 32-bit trie */ 260 fprintf( 261 f, 262 " %s,\n" /* index */ 263 " nullptr,\n" /* data16 */ 264 " %s,\n", /* data32 */ 265 indexName, 266 data32Name); 267 } 268 fprintf( 269 f, 270 " %ld,\n" /* indexLength */ 271 " %ld,\n" /* dataLength */ 272 " 0x%hx,\n" /* index2NullOffset */ 273 " 0x%hx,\n" /* dataNullOffset */ 274 " 0x%lx,\n" /* initialValue */ 275 " 0x%lx,\n" /* errorValue */ 276 " 0x%lx,\n" /* highStart */ 277 " 0x%lx,\n" /* highValueIndex */ 278 " nullptr, 0, false, false, 0, nullptr\n", 279 (long)pTrie->indexLength, (long)pTrie->dataLength, 280 (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, 281 (long)pTrie->initialValue, (long)pTrie->errorValue, 282 (long)pTrie->highStart, (long)pTrie->highValueIndex); 283 if(postfix!=nullptr) { 284 fputs(postfix, f); 285 } 286 } 287 288 U_CAPI void U_EXPORT2 289 usrc_writeUCPTrieArrays(FILE *f, 290 const char *indexPrefix, const char *dataPrefix, 291 const UCPTrie *pTrie, 292 const char *postfix, 293 UTargetSyntax syntax) { 294 const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : ""; 295 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix); 296 int32_t width= 297 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : 298 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : 299 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; 300 usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix); 301 } 302 303 U_CAPI void U_EXPORT2 304 usrc_writeUCPTrieStruct(FILE *f, 305 const char *prefix, 306 const UCPTrie *pTrie, 307 const char *indexName, const char *dataName, 308 const char *postfix, 309 UTargetSyntax syntax) { 310 if(prefix!=nullptr) { 311 fputs(prefix, f); 312 } 313 if (syntax == UPRV_TARGET_SYNTAX_CCODE) { 314 fprintf( 315 f, 316 " %s,\n" // index 317 " { %s },\n", // data (union) 318 indexName, 319 dataName); 320 } 321 const char* pattern = 322 (syntax == UPRV_TARGET_SYNTAX_CCODE) ? 323 " %ld, %ld,\n" // indexLength, dataLength 324 " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart 325 " %d, %d,\n" // type, valueWidth 326 " 0, 0,\n" // reserved32, reserved16 327 " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset 328 " 0x%lx,\n" // nullValue 329 : 330 "indexLength = %ld\n" 331 "dataLength = %ld\n" 332 "highStart = 0x%lx\n" 333 "shifted12HighStart = 0x%x\n" 334 "type = %d\n" 335 "valueWidth = %d\n" 336 "index3NullOffset = 0x%x\n" 337 "dataNullOffset = 0x%lx\n" 338 "nullValue = 0x%lx\n" 339 ; 340 fprintf( 341 f, 342 pattern, 343 (long)pTrie->indexLength, (long)pTrie->dataLength, 344 (long)pTrie->highStart, pTrie->shifted12HighStart, 345 pTrie->type, pTrie->valueWidth, 346 pTrie->index3NullOffset, (long)pTrie->dataNullOffset, 347 (long)pTrie->nullValue); 348 if(postfix!=nullptr) { 349 fputs(postfix, f); 350 } 351 } 352 353 U_CAPI void U_EXPORT2 354 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) { 355 int32_t width= 356 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : 357 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : 358 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; 359 char line[100], line2[100], line3[100], line4[100]; 360 361 switch (syntax) { 362 case UPRV_TARGET_SYNTAX_CCODE: 363 snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name); 364 snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name); 365 snprintf(line3, sizeof(line3), "\n};\n\n"); 366 break; 367 case UPRV_TARGET_SYNTAX_TOML: 368 snprintf(line, sizeof(line), "index = [\n "); 369 snprintf(line2, sizeof(line2), "data_%d = [\n ", (int)width); 370 snprintf(line3, sizeof(line3), "\n]\n"); 371 break; 372 default: 373 UPRV_UNREACHABLE_EXIT; 374 } 375 usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax); 376 377 switch (syntax) { 378 case UPRV_TARGET_SYNTAX_CCODE: 379 snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name); 380 snprintf(line2, sizeof(line2), "%s_trieIndex", name); 381 snprintf(line3, sizeof(line3), "%s_trieData", name); 382 snprintf(line4, sizeof(line4), "};\n\n"); 383 break; 384 case UPRV_TARGET_SYNTAX_TOML: 385 line[0] = 0; 386 line2[0] = 0; 387 line3[0] = 0; 388 line4[0] = 0; 389 break; 390 default: 391 UPRV_UNREACHABLE_EXIT; 392 } 393 usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax); 394 } 395 396 U_CAPI void U_EXPORT2 397 usrc_writeUnicodeSet( 398 FILE *f, 399 const USet *pSet, 400 UTargetSyntax syntax) { 401 // ccode is not yet supported 402 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); 403 404 // Write out a list of ranges 405 const UnicodeSet* set = UnicodeSet::fromUSet(pSet); 406 UnicodeSetIterator it(*set); 407 fprintf(f, "# Inclusive ranges of the code points in the set.\n"); 408 fprintf(f, "ranges = [\n"); 409 bool seenFirstString = false; 410 while (it.nextRange()) { 411 if (it.isString()) { 412 if (!seenFirstString) { 413 seenFirstString = true; 414 fprintf(f, "]\nstrings = [\n"); 415 } 416 const UnicodeString& str = it.getString(); 417 fprintf(f, " "); 418 usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax); 419 fprintf(f, ",\n"); 420 } else { 421 U_ASSERT(!seenFirstString); 422 UChar32 start = it.getCodepoint(); 423 UChar32 end = it.getCodepointEnd(); 424 fprintf(f, " [0x%x, 0x%x],\n", start, end); 425 } 426 } 427 fprintf(f, "]\n"); 428 } 429 430 U_CAPI void U_EXPORT2 431 usrc_writeUCPMap( 432 FILE *f, 433 const UCPMap *pMap, 434 icu::ValueNameGetter *valueNameGetter, 435 UTargetSyntax syntax) { 436 // ccode is not yet supported 437 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); 438 (void) syntax; // silence unused variable errors 439 440 // Print out list of ranges 441 UChar32 start = 0, end; 442 uint32_t value; 443 fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n"); 444 fprintf(f, "ranges = [\n"); 445 while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) { 446 if (valueNameGetter != nullptr) { 447 const char *name = valueNameGetter->getName(value); 448 fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name); 449 } else { 450 fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value); 451 } 452 start = end + 1; 453 } 454 fprintf(f, "]\n"); 455 } 456 457 U_CAPI void U_EXPORT2 458 usrc_writeArrayOfMostlyInvChars(FILE *f, 459 const char *prefix, 460 const char *p, int32_t length, 461 const char *postfix) { 462 int32_t i, col; 463 int prev2, prev, c; 464 465 if(prefix!=nullptr) { 466 fprintf(f, prefix, (long)length); 467 } 468 prev2=prev=-1; 469 for(i=col=0; i<length; ++i, ++col) { 470 c=(uint8_t)p[i]; 471 if(i>0) { 472 /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ 473 if( 474 /* Very long line. */ 475 col>=32 || 476 /* Long line, break after terminating NUL. */ 477 (col>=24 && prev2>=0x20 && prev==0) || 478 /* Medium-long line, break before non-NUL, non-character byte. */ 479 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20) 480 ) { 481 fputs(",\n", f); 482 col=0; 483 } else { 484 fputc(',', f); 485 } 486 } 487 fprintf(f, c<0x20 ? "%u" : "'%c'", c); 488 prev2=prev; 489 prev=c; 490 } 491 if(postfix!=nullptr) { 492 fputs(postfix, f); 493 } 494 } 495 496 U_CAPI void U_EXPORT2 497 usrc_writeStringAsASCII(FILE *f, 498 const char16_t* ptr, int32_t length, 499 UTargetSyntax) { 500 // For now, assume all UTargetSyntax values are valid here. 501 fprintf(f, "\""); 502 int32_t i = 0; 503 UChar32 cp; 504 while (i < length) { 505 U16_NEXT(ptr, i, length, cp); 506 if (cp == u'"') { 507 fprintf(f, "\\\""); 508 } else if (ICU_Utility::isUnprintable(cp)) { 509 UnicodeString u16result; 510 ICU_Utility::escapeUnprintable(u16result, cp); 511 std::string u8result; 512 u16result.toUTF8String(u8result); 513 fprintf(f, "%s", u8result.data()); 514 } else { 515 U_ASSERT(cp < 0x80); 516 char s[2] = {static_cast<char>(cp), 0}; 517 fprintf(f, "%s", s); 518 } 519 } 520 fprintf(f, "\""); 521 }