toolutil.cpp (12570B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1999-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: toolutil.c 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 1999nov19 16 * created by: Markus W. Scherer 17 * 18 * 6/25/08 - Added Cygwin specific code in uprv_mkdir - Brian Rower 19 * 20 * This file contains utility functions for ICU tools like genccode. 21 */ 22 23 #include "unicode/platform.h" 24 #if U_PLATFORM == U_PF_MINGW 25 // *cough* - for struct stat 26 #ifdef __STRICT_ANSI__ 27 #undef __STRICT_ANSI__ 28 #endif 29 #endif 30 31 #include <stdio.h> 32 #include <sys/stat.h> 33 #include <fstream> 34 #include <time.h> 35 #include "unicode/utypes.h" 36 37 #ifndef U_TOOLUTIL_IMPLEMENTATION 38 #error U_TOOLUTIL_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu 39 #endif 40 41 #if U_PLATFORM_USES_ONLY_WIN32_API 42 # define VC_EXTRALEAN 43 # define WIN32_LEAN_AND_MEAN 44 # define NOUSER 45 # define NOSERVICE 46 # define NOIME 47 # define NOMCX 48 # if U_PLATFORM == U_PF_MINGW 49 # define __NO_MINGW_LFS /* gets around missing 'off64_t' */ 50 # endif 51 # include <windows.h> 52 # include <direct.h> 53 #else 54 # include <sys/stat.h> 55 # include <sys/types.h> 56 #endif 57 58 /* In MinGW environment, io.h needs to be included for _mkdir() */ 59 #if U_PLATFORM == U_PF_MINGW 60 #include <io.h> 61 #endif 62 63 #include <errno.h> 64 65 #include <cstddef> 66 67 #include "unicode/errorcode.h" 68 #include "unicode/putil.h" 69 #include "unicode/uchar.h" 70 #include "unicode/umutablecptrie.h" 71 #include "unicode/ucptrie.h" 72 #include "cmemory.h" 73 #include "cstring.h" 74 #include "toolutil.h" 75 #include "uassert.h" 76 77 U_NAMESPACE_BEGIN 78 79 IcuToolErrorCode::~IcuToolErrorCode() { 80 // Safe because our handleFailure() does not throw exceptions. 81 if(isFailure()) { handleFailure(); } 82 } 83 84 void IcuToolErrorCode::handleFailure() const { 85 fprintf(stderr, "error at %s: %s\n", location, errorName()); 86 exit(errorCode); 87 } 88 89 namespace toolutil { 90 91 void setCPTrieBit(UMutableCPTrie *mutableCPTrie, 92 UChar32 start, UChar32 end, int32_t shift, bool on, UErrorCode &errorCode) { 93 uint32_t mask = U_MASK(shift); 94 uint32_t value = on ? mask : 0; 95 setCPTrieBits(mutableCPTrie, start, end, mask, value, errorCode); 96 } 97 98 void setCPTrieBits(UMutableCPTrie *mutableCPTrie, 99 UChar32 start, UChar32 end, uint32_t mask, uint32_t value, 100 UErrorCode &errorCode) { 101 if (U_FAILURE(errorCode)) { return; } 102 // The value must not have any bits set outside of the mask. 103 if ((value & ~mask) != 0) { 104 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 105 return; 106 } 107 108 if (start == end) { 109 uint32_t oldValue = umutablecptrie_get(mutableCPTrie, start); 110 uint32_t newValue = (oldValue & ~mask) | value; 111 if (newValue != oldValue) { 112 umutablecptrie_set(mutableCPTrie, start, newValue, &errorCode); 113 } 114 return; 115 } 116 while (start <= end && U_SUCCESS(errorCode)) { 117 uint32_t oldValue; 118 UChar32 rangeEnd = umutablecptrie_getRange( 119 mutableCPTrie, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &oldValue); 120 if (rangeEnd > end) { 121 rangeEnd = end; 122 } 123 uint32_t newValue = (oldValue & ~mask) | value; 124 if (newValue != oldValue) { 125 umutablecptrie_setRange(mutableCPTrie, start, rangeEnd, newValue, &errorCode); 126 } 127 start = rangeEnd + 1; 128 } 129 } 130 131 int32_t getCPTrieSize(UMutableCPTrie *mt, UCPTrieType type, UCPTrieValueWidth valueWidth) { 132 UErrorCode errorCode = U_ZERO_ERROR; 133 UCPTrie *cpTrie = umutablecptrie_buildImmutable(mt, type, valueWidth, &errorCode); 134 if (U_FAILURE(errorCode)) { 135 fprintf(stderr, 136 "toolutil/getCPTrieSize error: umutablecptrie_buildImmutable() failed: %s\n", 137 u_errorName(errorCode)); 138 return -1; 139 } 140 uint8_t block[100000]; 141 int32_t size = ucptrie_toBinary(cpTrie, block, sizeof(block), &errorCode); 142 ucptrie_close(cpTrie); 143 if (U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { 144 fprintf(stderr, 145 "toolutil/getCPTrieSize error: ucptrie_toBinary() failed: %s (length %ld)\n", 146 u_errorName(errorCode), static_cast<long>(size)); 147 return -1; 148 } 149 U_ASSERT((size & 3) == 0); // multiple of 4 bytes 150 return size; 151 } 152 153 } // toolutil 154 155 U_NAMESPACE_END 156 157 static int32_t currentYear = -1; 158 159 U_CAPI int32_t U_EXPORT2 getCurrentYear() { 160 if(currentYear == -1) { 161 time_t now = time(nullptr); 162 tm *fields = gmtime(&now); 163 currentYear = 1900 + fields->tm_year; 164 } 165 return currentYear; 166 } 167 168 169 U_CAPI const char * U_EXPORT2 170 getLongPathname(const char *pathname) { 171 #if U_PLATFORM_USES_ONLY_WIN32_API 172 /* anticipate problems with "short" pathnames */ 173 static WIN32_FIND_DATAA info; 174 HANDLE file=FindFirstFileA(pathname, &info); 175 if(file!=INVALID_HANDLE_VALUE) { 176 if(info.cAlternateFileName[0]!=0) { 177 /* this file has a short name, get and use the long one */ 178 const char *basename=findBasename(pathname); 179 if(basename!=pathname) { 180 /* prepend the long filename with the original path */ 181 uprv_memmove(info.cFileName+(basename-pathname), info.cFileName, uprv_strlen(info.cFileName)+1); 182 uprv_memcpy(info.cFileName, pathname, basename-pathname); 183 } 184 pathname=info.cFileName; 185 } 186 FindClose(file); 187 } 188 #endif 189 return pathname; 190 } 191 192 U_CAPI const char * U_EXPORT2 193 findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) { 194 if(U_FAILURE(*status)) return nullptr; 195 const char *resultPtr = nullptr; 196 int32_t resultLen = 0; 197 198 const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); 199 #if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR 200 const char *basenameAlt=uprv_strrchr(path, U_FILE_ALT_SEP_CHAR); 201 if(basenameAlt && (!basename || basename<basenameAlt)) { 202 basename = basenameAlt; 203 } 204 #endif 205 if(!basename) { 206 /* no basename - return ''. */ 207 resultPtr = ""; 208 resultLen = 0; 209 } else { 210 resultPtr = path; 211 resultLen = static_cast<int32_t>(basename - path); 212 if(resultLen<1) { 213 resultLen = 1; /* '/' or '/a' -> '/' */ 214 } 215 } 216 217 if((resultLen+1) <= bufLen) { 218 uprv_strncpy(buffer, resultPtr, resultLen); 219 buffer[resultLen]=0; 220 return buffer; 221 } else { 222 *status = U_BUFFER_OVERFLOW_ERROR; 223 return nullptr; 224 } 225 } 226 227 U_CAPI const char * U_EXPORT2 228 findBasename(const char *filename) { 229 const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR); 230 231 #if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR 232 //be lenient about pathname separators on Windows, like official implementation of C++17 std::filesystem in MSVC 233 //would be convenient to merge this loop with the one above, but alas, there is no such solution in the standard library 234 const char *alt_basename=uprv_strrchr(filename, U_FILE_ALT_SEP_CHAR); 235 if(alt_basename>basename) { 236 basename=alt_basename; 237 } 238 #endif 239 240 if(basename!=nullptr) { 241 return basename+1; 242 } else { 243 return filename; 244 } 245 } 246 247 U_CAPI void U_EXPORT2 248 uprv_mkdir(const char *pathname, UErrorCode *status) { 249 250 int retVal = 0; 251 #if U_PLATFORM_USES_ONLY_WIN32_API 252 retVal = _mkdir(pathname); 253 #else 254 retVal = mkdir(pathname, S_IRWXU | (S_IROTH | S_IXOTH) | (S_IROTH | S_IXOTH)); 255 #endif 256 if (retVal && errno != EEXIST) { 257 #if U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 258 /*if using Cygwin and the mkdir says it failed...check if the directory already exists..*/ 259 /* if it does...don't give the error, if it does not...give the error - Brian Rower - 6/25/08 */ 260 struct stat st; 261 262 if(stat(pathname,&st) != 0) 263 { 264 *status = U_FILE_ACCESS_ERROR; 265 } 266 #else 267 *status = U_FILE_ACCESS_ERROR; 268 #endif 269 } 270 } 271 272 #if !UCONFIG_NO_FILE_IO 273 U_CAPI UBool U_EXPORT2 274 uprv_fileExists(const char *file) { 275 struct stat stat_buf; 276 if (stat(file, &stat_buf) == 0) { 277 return true; 278 } else { 279 return false; 280 } 281 } 282 #endif 283 284 U_CAPI int32_t U_EXPORT2 285 uprv_compareGoldenFiles( 286 const char* buffer, int32_t bufferLen, 287 const char* goldenFilePath, 288 bool overwrite) { 289 290 if (overwrite) { 291 std::ofstream ofs; 292 ofs.open(goldenFilePath); 293 ofs.write(buffer, bufferLen); 294 ofs.close(); 295 return -1; 296 } 297 298 std::ifstream ifs(goldenFilePath, std::ifstream::in); 299 int32_t pos = 0; 300 char c; 301 while (ifs.get(c) && pos < bufferLen) { 302 if (c != buffer[pos]) { 303 // Files differ at this position 304 break; 305 } 306 pos++; 307 } 308 if (pos == bufferLen && ifs.eof()) { 309 // Files are same lengths 310 pos = -1; 311 } 312 ifs.close(); 313 return pos; 314 } 315 316 /*U_CAPI UDate U_EXPORT2 317 uprv_getModificationDate(const char *pathname, UErrorCode *status) 318 { 319 if(U_FAILURE(*status)) { 320 return; 321 } 322 // TODO: handle case where stat is not available 323 struct stat st; 324 325 if(stat(pathname,&st) != 0) 326 { 327 *status = U_FILE_ACCESS_ERROR; 328 } else { 329 return st.st_mtime; 330 } 331 } 332 */ 333 334 /* tool memory helper ------------------------------------------------------- */ 335 336 struct UToolMemory { 337 char name[64]; 338 int32_t capacity, maxCapacity, size, idx; 339 void *array; 340 alignas(std::max_align_t) char staticArray[1]; 341 }; 342 343 U_CAPI UToolMemory * U_EXPORT2 344 utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size) { 345 UToolMemory *mem; 346 347 if(maxCapacity<initialCapacity) { 348 maxCapacity=initialCapacity; 349 } 350 351 mem=(UToolMemory *)uprv_malloc(sizeof(UToolMemory)+initialCapacity*size); 352 if(mem==nullptr) { 353 fprintf(stderr, "error: %s - out of memory\n", name); 354 exit(U_MEMORY_ALLOCATION_ERROR); 355 } 356 mem->array=mem->staticArray; 357 358 uprv_strcpy(mem->name, name); 359 mem->capacity=initialCapacity; 360 mem->maxCapacity=maxCapacity; 361 mem->size=size; 362 mem->idx=0; 363 return mem; 364 } 365 366 U_CAPI void U_EXPORT2 367 utm_close(UToolMemory *mem) { 368 if(mem!=nullptr) { 369 if(mem->array!=mem->staticArray) { 370 uprv_free(mem->array); 371 } 372 uprv_free(mem); 373 } 374 } 375 376 377 U_CAPI void * U_EXPORT2 378 utm_getStart(UToolMemory *mem) { 379 return (char *)mem->array; 380 } 381 382 U_CAPI int32_t U_EXPORT2 383 utm_countItems(UToolMemory *mem) { 384 return mem->idx; 385 } 386 387 388 static UBool 389 utm_hasCapacity(UToolMemory *mem, int32_t capacity) { 390 if(mem->capacity<capacity) { 391 int32_t newCapacity; 392 393 if(mem->maxCapacity<capacity) { 394 fprintf(stderr, "error: %s - trying to use more than maxCapacity=%ld units\n", 395 mem->name, static_cast<long>(mem->maxCapacity)); 396 exit(U_MEMORY_ALLOCATION_ERROR); 397 } 398 399 /* try to allocate a larger array */ 400 if(capacity>=2*mem->capacity) { 401 newCapacity=capacity; 402 } else if(mem->capacity<=mem->maxCapacity/3) { 403 newCapacity=2*mem->capacity; 404 } else { 405 newCapacity=mem->maxCapacity; 406 } 407 408 if(mem->array==mem->staticArray) { 409 mem->array=uprv_malloc(newCapacity*mem->size); 410 if(mem->array!=nullptr) { 411 uprv_memcpy(mem->array, mem->staticArray, (size_t)mem->idx*mem->size); 412 } 413 } else { 414 mem->array=uprv_realloc(mem->array, newCapacity*mem->size); 415 } 416 417 if(mem->array==nullptr) { 418 fprintf(stderr, "error: %s - out of memory\n", mem->name); 419 exit(U_MEMORY_ALLOCATION_ERROR); 420 } 421 mem->capacity=newCapacity; 422 } 423 424 return true; 425 } 426 427 U_CAPI void * U_EXPORT2 428 utm_alloc(UToolMemory *mem) { 429 char *p=nullptr; 430 int32_t oldIndex=mem->idx; 431 int32_t newIndex=oldIndex+1; 432 if(utm_hasCapacity(mem, newIndex)) { 433 p=(char *)mem->array+oldIndex*mem->size; 434 mem->idx=newIndex; 435 uprv_memset(p, 0, mem->size); 436 } 437 return p; 438 } 439 440 U_CAPI void * U_EXPORT2 441 utm_allocN(UToolMemory *mem, int32_t n) { 442 char *p=nullptr; 443 int32_t oldIndex=mem->idx; 444 int32_t newIndex=oldIndex+n; 445 if(utm_hasCapacity(mem, newIndex)) { 446 p=(char *)mem->array+oldIndex*mem->size; 447 mem->idx=newIndex; 448 uprv_memset(p, 0, n*mem->size); 449 } 450 return p; 451 }