tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

putil.cpp (81984B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1997-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
     12 *
     13 *   Date        Name        Description
     14 *   04/14/97    aliu        Creation.
     15 *   04/24/97    aliu        Added getDefaultDataDirectory() and
     16 *                            getDefaultLocaleID().
     17 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
     18 *                            for assumed case.  Non-UNIX platforms must be
     19 *                            special-cased.  Rewrote numeric methods dealing
     20 *                            with NaN and Infinity to be platform independent
     21 *                             over all IEEE 754 platforms.
     22 *   05/13/97    aliu        Restored sign of timezone
     23 *                            (semantics are hours West of GMT)
     24 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
     25 *                             nextDouble..
     26 *   07/22/98    stephen     Added remainder, max, min, trunc
     27 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
     28 *   08/24/98    stephen     Added longBitsFromDouble
     29 *   09/08/98    stephen     Minor changes for Mac Port
     30 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
     31 *                            Fixed EBCDIC tables
     32 *   04/15/99    stephen     Converted to C.
     33 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
     34 *   08/04/99    jeffrey R.  Added OS/2 changes
     35 *   11/15/99    helena      Integrated S/390 IEEE support.
     36 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
     37 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
     38 *   01/03/08    Steven L.   Fake Time Support
     39 ******************************************************************************
     40 */
     41 
     42 // Defines _XOPEN_SOURCE for access to POSIX functions.
     43 // Must be before any other #includes.
     44 #include "uposixdefs.h"
     45 
     46 // First, the platform type. Need this for U_PLATFORM.
     47 #include "unicode/platform.h"
     48 
     49 /*
     50 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
     51 */
     52 #include <time.h>
     53 
     54 #if !U_PLATFORM_USES_ONLY_WIN32_API
     55 #include <sys/time.h>
     56 #endif
     57 
     58 /* include the rest of the ICU headers */
     59 #include "unicode/putil.h"
     60 #include "unicode/ustring.h"
     61 #include "putilimp.h"
     62 #include "uassert.h"
     63 #include "umutex.h"
     64 #include "cmemory.h"
     65 #include "cstring.h"
     66 #include "locmap.h"
     67 #include "ucln_cmn.h"
     68 #include "charstr.h"
     69 
     70 /* Include standard headers. */
     71 #include <stdio.h>
     72 #include <stdlib.h>
     73 #include <string.h>
     74 #include <math.h>
     75 #include <locale.h>
     76 #include <float.h>
     77 
     78 #ifndef U_COMMON_IMPLEMENTATION
     79 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
     80 #endif
     81 
     82 
     83 /* include system headers */
     84 #if U_PLATFORM_USES_ONLY_WIN32_API
     85    /*
     86     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
     87     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
     88     * to use native APIs as much as possible?
     89     */
     90 #ifndef WIN32_LEAN_AND_MEAN
     91 #   define WIN32_LEAN_AND_MEAN
     92 #endif
     93 #   define VC_EXTRALEAN
     94 #   define NOUSER
     95 #   define NOSERVICE
     96 #   define NOIME
     97 #   define NOMCX
     98 #   include <windows.h>
     99 #   include "unicode/uloc.h"
    100 #   include "wintz.h"
    101 #elif U_PLATFORM == U_PF_OS400
    102 #   include <float.h>
    103 #   include <qusec.h>       /* error code structure */
    104 #   include <qusrjobi.h>
    105 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
    106 #   include <mih/testptr.h> /* For uprv_maximumPtr */
    107 #elif U_PLATFORM == U_PF_OS390
    108 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
    109 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
    110 #   include <limits.h>
    111 #   include <unistd.h>
    112 #   if U_PLATFORM == U_PF_SOLARIS
    113 #       ifndef _XPG4_2
    114 #           define _XPG4_2
    115 #       endif
    116 #   elif U_PLATFORM == U_PF_ANDROID
    117 #       include <sys/system_properties.h>
    118 #       include <dlfcn.h>
    119 #   endif
    120 #elif U_PLATFORM == U_PF_QNX
    121 #   include <sys/neutrino.h>
    122 #endif
    123 
    124 
    125 /*
    126 * Only include langinfo.h if we have a way to get the codeset. If we later
    127 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
    128 *
    129 */
    130 
    131 #if U_HAVE_NL_LANGINFO_CODESET
    132 #include <langinfo.h>
    133 #endif
    134 
    135 /**
    136 * Simple things (presence of functions, etc) should just go in configure.in and be added to
    137 * icucfg.h via autoheader.
    138 */
    139 #if U_PLATFORM_IMPLEMENTS_POSIX
    140 #   if U_PLATFORM == U_PF_OS400
    141 #    define HAVE_DLFCN_H 0
    142 #    define HAVE_DLOPEN 0
    143 #   else
    144 #   ifndef HAVE_DLFCN_H
    145 #    define HAVE_DLFCN_H 1
    146 #   endif
    147 #   ifndef HAVE_DLOPEN
    148 #    define HAVE_DLOPEN 1
    149 #   endif
    150 #   endif
    151 #   ifndef HAVE_GETTIMEOFDAY
    152 #    define HAVE_GETTIMEOFDAY 1
    153 #   endif
    154 #else
    155 #   define HAVE_DLFCN_H 0
    156 #   define HAVE_DLOPEN 0
    157 #   define HAVE_GETTIMEOFDAY 0
    158 #endif
    159 
    160 U_NAMESPACE_USE
    161 
    162 /* Define the extension for data files, again... */
    163 #define DATA_TYPE "dat"
    164 
    165 /* Leave this copyright notice here! */
    166 static const char copyright[] = U_COPYRIGHT_STRING;
    167 
    168 /* floating point implementations ------------------------------------------- */
    169 
    170 /* We return QNAN rather than SNAN*/
    171 #define SIGN 0x80000000U
    172 
    173 /* Make it easy to define certain types of constants */
    174 typedef union {
    175    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
    176    double d64;
    177 } BitPatternConversion;
    178 static const BitPatternConversion gNan = {static_cast<int64_t>(INT64_C(0x7FF8000000000000))};
    179 static const BitPatternConversion gInf = {static_cast<int64_t>(INT64_C(0x7FF0000000000000))};
    180 
    181 /*---------------------------------------------------------------------------
    182  Platform utilities
    183  Our general strategy is to assume we're on a POSIX platform.  Platforms which
    184  are non-POSIX must declare themselves so.  The default POSIX implementation
    185  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
    186  functions).
    187  ---------------------------------------------------------------------------*/
    188 
    189 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
    190 #   undef U_POSIX_LOCALE
    191 #else
    192 #   define U_POSIX_LOCALE    1
    193 #endif
    194 
    195 /*
    196    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
    197    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
    198 */
    199 #if !IEEE_754
    200 static char*
    201 u_topNBytesOfDouble(double* d, int n)
    202 {
    203 #if U_IS_BIG_ENDIAN
    204    return (char*)d;
    205 #else
    206    return (char*)(d + 1) - n;
    207 #endif
    208 }
    209 
    210 static char*
    211 u_bottomNBytesOfDouble(double* d, int n)
    212 {
    213 #if U_IS_BIG_ENDIAN
    214    return (char*)(d + 1) - n;
    215 #else
    216    return (char*)d;
    217 #endif
    218 }
    219 #endif   /* !IEEE_754 */
    220 
    221 #if IEEE_754
    222 static UBool
    223 u_signBit(double d) {
    224    uint8_t hiByte;
    225 #if U_IS_BIG_ENDIAN
    226    hiByte = *(uint8_t *)&d;
    227 #else
    228    hiByte = *(reinterpret_cast<uint8_t*>(&d) + sizeof(double) - 1);
    229 #endif
    230    return (hiByte & 0x80) != 0;
    231 }
    232 #endif
    233 
    234 
    235 
    236 #if defined (U_DEBUG_FAKETIME)
    237 /* Override the clock to test things without having to move the system clock.
    238 * Assumes POSIX gettimeofday() will function
    239 */
    240 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
    241 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
    242 UBool fakeClock_set = false; /** True if fake clock has spun up **/
    243 
    244 static UDate getUTCtime_real() {
    245    struct timeval posixTime;
    246    gettimeofday(&posixTime, nullptr);
    247    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    248 }
    249 
    250 static UDate getUTCtime_fake() {
    251    static UMutex fakeClockMutex;
    252    umtx_lock(&fakeClockMutex);
    253    if(!fakeClock_set) {
    254        UDate real = getUTCtime_real();
    255        const char *fake_start = getenv("U_FAKETIME_START");
    256        if((fake_start!=nullptr) && (fake_start[0]!=0)) {
    257            sscanf(fake_start,"%lf",&fakeClock_t0);
    258            fakeClock_dt = fakeClock_t0 - real;
    259            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
    260                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
    261                    fakeClock_t0, fake_start, fakeClock_dt, real);
    262        } else {
    263          fakeClock_dt = 0;
    264            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
    265                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
    266        }
    267        fakeClock_set = true;
    268    }
    269    umtx_unlock(&fakeClockMutex);
    270 
    271    return getUTCtime_real() + fakeClock_dt;
    272 }
    273 #endif
    274 
    275 #if U_PLATFORM_USES_ONLY_WIN32_API
    276 typedef union {
    277    int64_t int64;
    278    FILETIME fileTime;
    279 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
    280 
    281 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
    282 #define EPOCH_BIAS  INT64_C(116444736000000000)
    283 #define HECTONANOSECOND_PER_MILLISECOND   10000
    284 
    285 #endif
    286 
    287 /*---------------------------------------------------------------------------
    288  Universal Implementations
    289  These are designed to work on all platforms.  Try these, and if they
    290  don't work on your platform, then special case your platform with new
    291  implementations.
    292 ---------------------------------------------------------------------------*/
    293 
    294 U_CAPI UDate U_EXPORT2
    295 uprv_getUTCtime()
    296 {
    297 #if defined(U_DEBUG_FAKETIME)
    298    return getUTCtime_fake(); /* Hook for overriding the clock */
    299 #else
    300    return uprv_getRawUTCtime();
    301 #endif
    302 }
    303 
    304 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
    305 U_CAPI UDate U_EXPORT2
    306 uprv_getRawUTCtime()
    307 {
    308 #if U_PLATFORM_USES_ONLY_WIN32_API
    309 
    310    FileTimeConversion winTime;
    311    GetSystemTimeAsFileTime(&winTime.fileTime);
    312    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
    313 #else
    314 
    315 #if HAVE_GETTIMEOFDAY
    316    struct timeval posixTime;
    317    gettimeofday(&posixTime, nullptr);
    318    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    319 #else
    320    time_t epochtime;
    321    time(&epochtime);
    322    return (UDate)epochtime * U_MILLIS_PER_SECOND;
    323 #endif
    324 
    325 #endif
    326 }
    327 
    328 /*-----------------------------------------------------------------------------
    329  IEEE 754
    330  These methods detect and return NaN and infinity values for doubles
    331  conforming to IEEE 754.  Platforms which support this standard include X86,
    332  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
    333  If this doesn't work on your platform, you have non-IEEE floating-point, and
    334  will need to code your own versions.  A naive implementation is to return 0.0
    335  for getNaN and getInfinity, and false for isNaN and isInfinite.
    336  ---------------------------------------------------------------------------*/
    337 
    338 U_CAPI UBool U_EXPORT2
    339 uprv_isNaN(double number)
    340 {
    341 #if IEEE_754
    342    BitPatternConversion convertedNumber;
    343    convertedNumber.d64 = number;
    344    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
    345    return (convertedNumber.i64 & U_INT64_MAX) > gInf.i64;
    346 
    347 #elif U_PLATFORM == U_PF_OS390
    348    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    349                        sizeof(uint32_t));
    350    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    351                        sizeof(uint32_t));
    352 
    353    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
    354      (lowBits == 0x00000000L);
    355 
    356 #else
    357    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    358    /* you'll need to replace this default implementation with what's correct*/
    359    /* for your platform.*/
    360    return number != number;
    361 #endif
    362 }
    363 
    364 U_CAPI UBool U_EXPORT2
    365 uprv_isInfinite(double number)
    366 {
    367 #if IEEE_754
    368    BitPatternConversion convertedNumber;
    369    convertedNumber.d64 = number;
    370    /* Infinity is exactly 0x7FF0000000000000U. */
    371    return (convertedNumber.i64 & U_INT64_MAX) == gInf.i64;
    372 #elif U_PLATFORM == U_PF_OS390
    373    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    374                        sizeof(uint32_t));
    375    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    376                        sizeof(uint32_t));
    377 
    378    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
    379 
    380 #else
    381    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    382    /* value, you'll need to replace this default implementation with what's*/
    383    /* correct for your platform.*/
    384    return number == (2.0 * number);
    385 #endif
    386 }
    387 
    388 U_CAPI UBool U_EXPORT2
    389 uprv_isPositiveInfinity(double number)
    390 {
    391 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    392    return number > 0 && uprv_isInfinite(number);
    393 #else
    394    return uprv_isInfinite(number);
    395 #endif
    396 }
    397 
    398 U_CAPI UBool U_EXPORT2
    399 uprv_isNegativeInfinity(double number)
    400 {
    401 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    402    return number < 0 && uprv_isInfinite(number);
    403 
    404 #else
    405    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    406                        sizeof(uint32_t));
    407    return((highBits & SIGN) && uprv_isInfinite(number));
    408 
    409 #endif
    410 }
    411 
    412 U_CAPI double U_EXPORT2
    413 uprv_getNaN()
    414 {
    415 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    416    return gNan.d64;
    417 #else
    418    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    419    /* you'll need to replace this default implementation with what's correct*/
    420    /* for your platform.*/
    421    return 0.0;
    422 #endif
    423 }
    424 
    425 U_CAPI double U_EXPORT2
    426 uprv_getInfinity()
    427 {
    428 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    429    return gInf.d64;
    430 #else
    431    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    432    /* value, you'll need to replace this default implementation with what's*/
    433    /* correct for your platform.*/
    434    return 0.0;
    435 #endif
    436 }
    437 
    438 U_CAPI double U_EXPORT2
    439 uprv_floor(double x)
    440 {
    441    return floor(x);
    442 }
    443 
    444 U_CAPI double U_EXPORT2
    445 uprv_ceil(double x)
    446 {
    447    return ceil(x);
    448 }
    449 
    450 U_CAPI double U_EXPORT2
    451 uprv_round(double x)
    452 {
    453    return uprv_floor(x + 0.5);
    454 }
    455 
    456 U_CAPI double U_EXPORT2
    457 uprv_fabs(double x)
    458 {
    459    return fabs(x);
    460 }
    461 
    462 U_CAPI double U_EXPORT2
    463 uprv_modf(double x, double* y)
    464 {
    465    return modf(x, y);
    466 }
    467 
    468 U_CAPI double U_EXPORT2
    469 uprv_fmod(double x, double y)
    470 {
    471    return fmod(x, y);
    472 }
    473 
    474 U_CAPI double U_EXPORT2
    475 uprv_pow(double x, double y)
    476 {
    477    /* This is declared as "double pow(double x, double y)" */
    478    return pow(x, y);
    479 }
    480 
    481 U_CAPI double U_EXPORT2
    482 uprv_pow10(int32_t x)
    483 {
    484    return pow(10.0, (double)x);
    485 }
    486 
    487 U_CAPI double U_EXPORT2
    488 uprv_fmax(double x, double y)
    489 {
    490 #if IEEE_754
    491    /* first handle NaN*/
    492    if(uprv_isNaN(x) || uprv_isNaN(y))
    493        return uprv_getNaN();
    494 
    495    /* check for -0 and 0*/
    496    if(x == 0.0 && y == 0.0 && u_signBit(x))
    497        return y;
    498 
    499 #endif
    500 
    501    /* this should work for all flt point w/o NaN and Inf special cases */
    502    return (x > y ? x : y);
    503 }
    504 
    505 U_CAPI double U_EXPORT2
    506 uprv_fmin(double x, double y)
    507 {
    508 #if IEEE_754
    509    /* first handle NaN*/
    510    if(uprv_isNaN(x) || uprv_isNaN(y))
    511        return uprv_getNaN();
    512 
    513    /* check for -0 and 0*/
    514    if(x == 0.0 && y == 0.0 && u_signBit(y))
    515        return y;
    516 
    517 #endif
    518 
    519    /* this should work for all flt point w/o NaN and Inf special cases */
    520    return (x > y ? y : x);
    521 }
    522 
    523 U_CAPI UBool U_EXPORT2
    524 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
    525    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
    526    // This function could be optimized by calling one of those primitives.
    527    auto a64 = static_cast<int64_t>(a);
    528    auto b64 = static_cast<int64_t>(b);
    529    int64_t res64 = a64 + b64;
    530    *res = static_cast<int32_t>(res64);
    531    return res64 != *res;
    532 }
    533 
    534 U_CAPI UBool U_EXPORT2
    535 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
    536    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
    537    // This function could be optimized by calling one of those primitives.
    538    auto a64 = static_cast<int64_t>(a);
    539    auto b64 = static_cast<int64_t>(b);
    540    int64_t res64 = a64 * b64;
    541    *res = static_cast<int32_t>(res64);
    542    return res64 != *res;
    543 }
    544 
    545 /**
    546 * Truncates the given double.
    547 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
    548 * This is different than calling floor() or ceil():
    549 * floor(3.3) = 3, floor(-3.3) = -4
    550 * ceil(3.3) = 4, ceil(-3.3) = -3
    551 */
    552 U_CAPI double U_EXPORT2
    553 uprv_trunc(double d)
    554 {
    555 #if IEEE_754
    556    /* handle error cases*/
    557    if(uprv_isNaN(d))
    558        return uprv_getNaN();
    559    if(uprv_isInfinite(d))
    560        return uprv_getInfinity();
    561 
    562    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
    563        return ceil(d);
    564    else
    565        return floor(d);
    566 
    567 #else
    568    return d >= 0 ? floor(d) : ceil(d);
    569 
    570 #endif
    571 }
    572 
    573 /**
    574 * Return the largest positive number that can be represented by an integer
    575 * type of arbitrary bit length.
    576 */
    577 U_CAPI double U_EXPORT2
    578 uprv_maxMantissa()
    579 {
    580    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
    581 }
    582 
    583 U_CAPI double U_EXPORT2
    584 uprv_log(double d)
    585 {
    586    return log(d);
    587 }
    588 
    589 U_CAPI void * U_EXPORT2
    590 uprv_maximumPtr(void * base)
    591 {
    592 #if U_PLATFORM == U_PF_OS400
    593    /*
    594     * With the provided function we should never be out of range of a given segment
    595     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
    596     * id and 3 bytes for the offset.  The key is that the casting takes care of
    597     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
    598     * seen in a program is x001000 and when casted to an int would be 0.
    599     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
    600     *
    601     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
    602     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
    603     * This function determines the activation based on the pointer that is passed in and
    604     * calculates the appropriate maximum available size for
    605     * each pointer type (TERASPACE and non-TERASPACE)
    606     *
    607     * Unlike other operating systems, the pointer model isn't determined at
    608     * compile time on i5/OS.
    609     */
    610    if ((base != nullptr) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
    611        /* if it is a TERASPACE pointer the max is 2GB - 4k */
    612        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
    613    }
    614    /* otherwise 16MB since nullptr ptr is not checkable or the ptr is not TERASPACE */
    615    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
    616 
    617 #else
    618    return U_MAX_PTR(base);
    619 #endif
    620 }
    621 
    622 /*---------------------------------------------------------------------------
    623  Platform-specific Implementations
    624  Try these, and if they don't work on your platform, then special case your
    625  platform with new implementations.
    626  ---------------------------------------------------------------------------*/
    627 
    628 /* Generic time zone layer -------------------------------------------------- */
    629 
    630 /* Time zone utilities */
    631 U_CAPI void U_EXPORT2
    632 uprv_tzset()
    633 {
    634 #if defined(U_TZSET)
    635    U_TZSET();
    636 #else
    637    /* no initialization*/
    638 #endif
    639 }
    640 
    641 U_CAPI int32_t U_EXPORT2
    642 uprv_timezone()
    643 {
    644 #ifdef U_TIMEZONE
    645    return U_TIMEZONE;
    646 #else
    647    time_t t, t1, t2;
    648    struct tm tmrec;
    649    int32_t tdiff = 0;
    650 
    651    time(&t);
    652    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
    653 #if U_PLATFORM != U_PF_IPHONE
    654    UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
    655 #endif
    656    t1 = mktime(&tmrec);                 /* local time in seconds*/
    657    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
    658    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
    659    tdiff = t2 - t1;
    660 
    661 #if U_PLATFORM != U_PF_IPHONE
    662    /* imitate NT behaviour, which returns same timezone offset to GMT for
    663       winter and summer.
    664       This does not work on all platforms. For instance, on glibc on Linux
    665       and on Mac OS 10.5, tdiff calculated above remains the same
    666       regardless of whether DST is in effect or not. iOS is another
    667       platform where this does not work. Linux + glibc and Mac OS 10.5
    668       have U_TIMEZONE defined so that this code is not reached.
    669    */
    670    if (dst_checked)
    671        tdiff += 3600;
    672 #endif
    673    return tdiff;
    674 #endif
    675 }
    676 
    677 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
    678   some platforms need to have it declared here. */
    679 
    680 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
    681 /* RS6000 and others reject char **tzname.  */
    682 extern U_IMPORT char *U_TZNAME[];
    683 #endif
    684 
    685 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
    686 /* These platforms are likely to use Olson timezone IDs. */
    687 /* common targets of the symbolic link at TZDEFAULT are:
    688 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
    689 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
    690 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
    691 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
    692 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
    693 * To avoid checking lots of paths, just check that the target path
    694 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
    695 */
    696 
    697 #define CHECK_LOCALTIME_LINK 1
    698 #if U_PLATFORM_IS_DARWIN_BASED
    699 #include <tzfile.h>
    700 #define TZZONEINFO      (TZDIR "/")
    701 #elif U_PLATFORM == U_PF_SOLARIS
    702 #define TZDEFAULT       "/etc/localtime"
    703 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
    704 #define TZ_ENV_CHECK    "localtime"
    705 #else
    706 #define TZDEFAULT       "/etc/localtime"
    707 #define TZZONEINFO      "/usr/share/zoneinfo/"
    708 #endif
    709 #define TZZONEINFOTAIL  "/zoneinfo/"
    710 #if U_HAVE_DIRENT_H
    711 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
    712 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
    713   symlinked to /etc/localtime, which makes searchForTZFile return
    714   'localtime' when it's the first match. */
    715 #define TZFILE_SKIP2    "localtime"
    716 #define SEARCH_TZFILE
    717 #include <dirent.h>  /* Needed to search through system timezone files */
    718 #endif
    719 static char gTimeZoneBuffer[PATH_MAX];
    720 static const char *gTimeZoneBufferPtr = nullptr;
    721 #endif
    722 
    723 #if !U_PLATFORM_USES_ONLY_WIN32_API
    724 #define isNonDigit(ch) (ch < '0' || '9' < ch)
    725 #define isDigit(ch) ('0' <= ch && ch <= '9')
    726 static UBool isValidOlsonID(const char *id) {
    727    int32_t idx = 0;
    728    int32_t idxMax = 0;
    729 
    730    /* Determine if this is something like Iceland (Olson ID)
    731    or AST4ADT (non-Olson ID) */
    732    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
    733        idx++;
    734    }
    735 
    736    /* Allow at maximum 2 numbers at the end of the id to support zone id's
    737    like GMT+11. */
    738    idxMax = idx + 2;
    739    while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
    740        idx++;
    741    }
    742 
    743    /* If we went through the whole string, then it might be okay.
    744    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
    745    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
    746    The rest of the time it could be an Olson ID. George */
    747    return id[idx] == 0
    748        || uprv_strcmp(id, "PST8PDT") == 0
    749        || uprv_strcmp(id, "MST7MDT") == 0
    750        || uprv_strcmp(id, "CST6CDT") == 0
    751        || uprv_strcmp(id, "EST5EDT") == 0;
    752 }
    753 
    754 /* On some Unix-like OS, 'posix' subdirectory in
    755   /usr/share/zoneinfo replicates the top-level contents. 'right'
    756   subdirectory has the same set of files, but individual files
    757   are different from those in the top-level directory or 'posix'
    758   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
    759   has files for UTC.
    760   When the first match for /etc/localtime is in either of them
    761   (usually in posix because 'right' has different file contents),
    762   or TZ environment variable points to one of them, createTimeZone
    763   fails because, say, 'posix/America/New_York' is not an Olson
    764   timezone id ('America/New_York' is). So, we have to skip
    765   'posix/' and 'right/' at the beginning. */
    766 static void skipZoneIDPrefix(const char** id) {
    767    if (uprv_strncmp(*id, "posix/", 6) == 0
    768        || uprv_strncmp(*id, "right/", 6) == 0)
    769    {
    770        *id += 6;
    771    }
    772 }
    773 #endif
    774 
    775 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
    776 
    777 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
    778 typedef struct OffsetZoneMapping {
    779    int32_t offsetSeconds;
    780    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
    781    const char *stdID;
    782    const char *dstID;
    783    const char *olsonID;
    784 } OffsetZoneMapping;
    785 
    786 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
    787 
    788 /*
    789 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
    790 and maps it to an Olson ID.
    791 Before adding anything to this list, take a look at
    792 icu/source/tools/tzcode/tz.alias
    793 Sometimes no daylight savings (0) is important to define due to aliases.
    794 This list can be tested with icu/source/test/compat/tzone.pl
    795 More values could be added to daylightType to increase precision.
    796 */
    797 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
    798    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
    799    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
    800    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
    801    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
    802    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
    803    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
    804    {-36000, 2, "EST", "EST", "Australia/Sydney"},
    805    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
    806    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
    807    {-34200, 2, "CST", "CST", "Australia/South"},
    808    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
    809    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
    810    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
    811    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
    812    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
    813    {-28800, 2, "WST", "WST", "Australia/West"},
    814    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
    815    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
    816    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
    817    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
    818    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
    819    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
    820    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
    821    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
    822    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
    823    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
    824    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
    825    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
    826    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
    827    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
    828    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
    829    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
    830    {0, 1, "GMT", "IST", "Europe/Dublin"},
    831    {0, 1, "GMT", "BST", "Europe/London"},
    832    {0, 0, "WET", "WEST", "Africa/Casablanca"},
    833    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
    834    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
    835    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
    836    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
    837    {10800, 2, "UYT", "UYST", "America/Montevideo"},
    838    {10800, 1, "WGT", "WGST", "America/Godthab"},
    839    {10800, 2, "BRT", "BRST", "Brazil/East"},
    840    {12600, 1, "NST", "NDT", "America/St_Johns"},
    841    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
    842    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
    843    {14400, 2, "CLT", "CLST", "Chile/Continental"},
    844    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
    845    {14400, 2, "PYT", "PYST", "America/Asuncion"},
    846    {18000, 1, "CST", "CDT", "America/Havana"},
    847    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
    848    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
    849    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
    850    {21600, 0, "CST", "CDT", "America/Guatemala"},
    851    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
    852    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
    853    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
    854    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
    855    {32400, 1, "AKST", "AKDT", "US/Alaska"},
    856    {36000, 1, "HAST", "HADT", "US/Aleutian"}
    857 };
    858 
    859 /*#define DEBUG_TZNAME*/
    860 
    861 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
    862 {
    863    int32_t idx;
    864 #ifdef DEBUG_TZNAME
    865    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
    866 #endif
    867    for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
    868    {
    869        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
    870            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
    871            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
    872            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
    873        {
    874            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
    875        }
    876    }
    877    return nullptr;
    878 }
    879 #endif
    880 
    881 #ifdef SEARCH_TZFILE
    882 #define MAX_READ_SIZE 512
    883 
    884 typedef struct DefaultTZInfo {
    885    char* defaultTZBuffer;
    886    int64_t defaultTZFileSize;
    887    FILE* defaultTZFilePtr;
    888    UBool defaultTZstatus;
    889    int32_t defaultTZPosition;
    890 } DefaultTZInfo;
    891 
    892 /*
    893 * This method compares the two files given to see if they are a match.
    894 * It is currently use to compare two TZ files.
    895 */
    896 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
    897    FILE* file;
    898    int64_t sizeFile;
    899    int64_t sizeFileLeft;
    900    int32_t sizeFileRead;
    901    int32_t sizeFileToRead;
    902    char bufferFile[MAX_READ_SIZE];
    903    UBool result = true;
    904 
    905    if (tzInfo->defaultTZFilePtr == nullptr) {
    906        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
    907    }
    908    file = fopen(TZFileName, "r");
    909 
    910    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
    911 
    912    if (file != nullptr && tzInfo->defaultTZFilePtr != nullptr) {
    913        /* First check that the file size are equal. */
    914        if (tzInfo->defaultTZFileSize == 0) {
    915            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
    916            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
    917        }
    918        fseek(file, 0, SEEK_END);
    919        sizeFile = ftell(file);
    920        sizeFileLeft = sizeFile;
    921 
    922        if (sizeFile != tzInfo->defaultTZFileSize) {
    923            result = false;
    924        } else {
    925            /* Store the data from the files in separate buffers and
    926             * compare each byte to determine equality.
    927             */
    928            if (tzInfo->defaultTZBuffer == nullptr) {
    929                rewind(tzInfo->defaultTZFilePtr);
    930                tzInfo->defaultTZBuffer = static_cast<char*>(uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize));
    931                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
    932            }
    933            rewind(file);
    934            while(sizeFileLeft > 0) {
    935                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
    936                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
    937 
    938                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
    939                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
    940                    result = false;
    941                    break;
    942                }
    943                sizeFileLeft -= sizeFileRead;
    944                tzInfo->defaultTZPosition += sizeFileRead;
    945            }
    946        }
    947    } else {
    948        result = false;
    949    }
    950 
    951    if (file != nullptr) {
    952        fclose(file);
    953    }
    954 
    955    return result;
    956 }
    957 
    958 
    959 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
    960 #define SKIP1 "."
    961 #define SKIP2 ".."
    962 static UBool U_CALLCONV putil_cleanup();
    963 static CharString *gSearchTZFileResult = nullptr;
    964 
    965 /*
    966 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
    967 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
    968 */
    969 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
    970    DIR* dirp = nullptr;
    971    struct dirent* dirEntry = nullptr;
    972    char* result = nullptr;
    973    UErrorCode status = U_ZERO_ERROR;
    974 
    975    /* Save the current path */
    976    CharString curpath(path, -1, status);
    977    if (U_FAILURE(status)) {
    978        goto cleanupAndReturn;
    979    }
    980 
    981    dirp = opendir(path);
    982    if (dirp == nullptr) {
    983        goto cleanupAndReturn;
    984    }
    985 
    986    if (gSearchTZFileResult == nullptr) {
    987        gSearchTZFileResult = new CharString;
    988        if (gSearchTZFileResult == nullptr) {
    989            goto cleanupAndReturn;
    990        }
    991        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
    992    }
    993 
    994    /* Check each entry in the directory. */
    995    while((dirEntry = readdir(dirp)) != nullptr) {
    996        const char* dirName = dirEntry->d_name;
    997        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
    998            && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
    999            /* Create a newpath with the new entry to test each entry in the directory. */
   1000            CharString newpath(curpath, status);
   1001            newpath.append(dirName, -1, status);
   1002            if (U_FAILURE(status)) {
   1003                break;
   1004            }
   1005 
   1006            DIR* subDirp = nullptr;
   1007            if ((subDirp = opendir(newpath.data())) != nullptr) {
   1008                /* If this new path is a directory, make a recursive call with the newpath. */
   1009                closedir(subDirp);
   1010                newpath.append('/', status);
   1011                if (U_FAILURE(status)) {
   1012                    break;
   1013                }
   1014                result = searchForTZFile(newpath.data(), tzInfo);
   1015                /*
   1016                 Have to get out here. Otherwise, we'd keep looking
   1017                 and return the first match in the top-level directory
   1018                 if there's a match in the top-level. If not, this function
   1019                 would return nullptr and set gTimeZoneBufferPtr to nullptr in initDefault().
   1020                 It worked without this in most cases because we have a fallback of calling
   1021                 localtime_r to figure out the default timezone.
   1022                */
   1023                if (result != nullptr)
   1024                    break;
   1025            } else {
   1026                if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
   1027                    int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
   1028                    if (amountToSkip > newpath.length()) {
   1029                        amountToSkip = newpath.length();
   1030                    }
   1031                    const char* zoneid = newpath.data() + amountToSkip;
   1032                    skipZoneIDPrefix(&zoneid);
   1033                    gSearchTZFileResult->clear();
   1034                    gSearchTZFileResult->append(zoneid, -1, status);
   1035                    if (U_FAILURE(status)) {
   1036                        break;
   1037                    }
   1038                    result = gSearchTZFileResult->data();
   1039                    /* Get out after the first one found. */
   1040                    break;
   1041                }
   1042            }
   1043        }
   1044    }
   1045 
   1046  cleanupAndReturn:
   1047    if (dirp) {
   1048        closedir(dirp);
   1049    }
   1050    return result;
   1051 }
   1052 #endif
   1053 
   1054 #if U_PLATFORM == U_PF_ANDROID
   1055 typedef int(system_property_read_callback)(const prop_info* info,
   1056                                           void (*callback)(void* cookie,
   1057                                                            const char* name,
   1058                                                            const char* value,
   1059                                                            uint32_t serial),
   1060                                           void* cookie);
   1061 typedef int(system_property_get)(const char*, char*);
   1062 
   1063 static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
   1064 
   1065 static void u_property_read(void* cookie, const char* name, const char* value,
   1066                            uint32_t serial) {
   1067    uprv_strcpy((char* )cookie, value);
   1068 }
   1069 #endif
   1070 
   1071 U_CAPI void U_EXPORT2
   1072 uprv_tzname_clear_cache()
   1073 {
   1074 #if U_PLATFORM == U_PF_ANDROID
   1075    /* Android's timezone is stored in system property. */
   1076    gAndroidTimeZone[0] = '\0';
   1077    void* libc = dlopen("libc.so", RTLD_NOLOAD);
   1078    if (libc) {
   1079        /* Android API 26+ has new API to get system property and old API
   1080         * (__system_property_get) is deprecated */
   1081        system_property_read_callback* property_read_callback =
   1082            (system_property_read_callback*)dlsym(
   1083                libc, "__system_property_read_callback");
   1084        if (property_read_callback) {
   1085            const prop_info* info =
   1086                __system_property_find("persist.sys.timezone");
   1087            if (info) {
   1088                property_read_callback(info, &u_property_read, gAndroidTimeZone);
   1089            }
   1090        } else {
   1091            system_property_get* property_get =
   1092                (system_property_get*)dlsym(libc, "__system_property_get");
   1093            if (property_get) {
   1094                property_get("persist.sys.timezone", gAndroidTimeZone);
   1095            }
   1096        }
   1097        dlclose(libc);
   1098    }
   1099 #endif
   1100 
   1101 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
   1102    gTimeZoneBufferPtr = nullptr;
   1103 #endif
   1104 }
   1105 
   1106 U_CAPI const char* U_EXPORT2
   1107 uprv_tzname(int n)
   1108 {
   1109    (void)n; // Avoid unreferenced parameter warning.
   1110    const char *tzid = nullptr;
   1111 #if U_PLATFORM_USES_ONLY_WIN32_API
   1112    tzid = uprv_detectWindowsTimeZone();
   1113 
   1114    if (tzid != nullptr) {
   1115        return tzid;
   1116    }
   1117 
   1118 #ifndef U_TZNAME
   1119    // The return value is free'd in timezone.cpp on Windows because
   1120    // the other code path returns a pointer to a heap location.
   1121    // If we don't have a name already, then tzname wouldn't be any
   1122    // better, so just fall back.
   1123    return uprv_strdup("");
   1124 #endif // !U_TZNAME
   1125 
   1126 #else
   1127 
   1128 /*#if U_PLATFORM_IS_DARWIN_BASED
   1129    int ret;
   1130 
   1131    tzid = getenv("TZFILE");
   1132    if (tzid != nullptr) {
   1133        return tzid;
   1134    }
   1135 #endif*/
   1136 
   1137 /* This code can be temporarily disabled to test tzname resolution later on. */
   1138 #ifndef DEBUG_TZNAME
   1139 #if U_PLATFORM == U_PF_ANDROID
   1140    tzid = gAndroidTimeZone;
   1141 #else
   1142    tzid = getenv("TZ");
   1143 #endif
   1144    if (tzid != nullptr && isValidOlsonID(tzid)
   1145 #if U_PLATFORM == U_PF_SOLARIS
   1146    /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
   1147        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
   1148 #endif
   1149    ) {
   1150        /* The colon forces tzset() to treat the remainder as zoneinfo path */
   1151        if (tzid[0] == ':') {
   1152            tzid++;
   1153        }
   1154        /* This might be a good Olson ID. */
   1155        skipZoneIDPrefix(&tzid);
   1156        return tzid;
   1157    }
   1158    /* else U_TZNAME will give a better result. */
   1159 #endif
   1160 
   1161 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
   1162    /* Caller must handle threading issues */
   1163    if (gTimeZoneBufferPtr == nullptr) {
   1164        /*
   1165        This is a trick to look at the name of the link to get the Olson ID
   1166        because the tzfile contents is underspecified.
   1167        This isn't guaranteed to work because it may not be a symlink.
   1168        */
   1169        char *ret = realpath(TZDEFAULT, gTimeZoneBuffer);
   1170        if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
   1171            int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
   1172            const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
   1173            // MacOS14 has the realpath as something like
   1174            // /usr/share/zoneinfo.default/Australia/Melbourne
   1175            // which will not have "/zoneinfo/" in the path.
   1176            // Therefore if we fail, we fall back to read the link which is
   1177            // /var/db/timezone/zoneinfo/Australia/Melbourne
   1178            // We also fall back to reading the link if the realpath leads to something like
   1179            // /usr/share/zoneinfo/posixrules
   1180            if (tzZoneInfoTailPtr == nullptr ||
   1181                    uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
   1182                ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
   1183                if (size > 0) {
   1184                    gTimeZoneBuffer[size] = 0;
   1185                    tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
   1186                }
   1187            }
   1188            if (tzZoneInfoTailPtr != nullptr) {
   1189                tzZoneInfoTailPtr += tzZoneInfoTailLen;
   1190                skipZoneIDPrefix(&tzZoneInfoTailPtr);
   1191                if (isValidOlsonID(tzZoneInfoTailPtr)) {
   1192                    return (gTimeZoneBufferPtr = tzZoneInfoTailPtr);
   1193                }
   1194            }
   1195        } else {
   1196 #if defined(SEARCH_TZFILE)
   1197            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
   1198            if (tzInfo != nullptr) {
   1199                tzInfo->defaultTZBuffer = nullptr;
   1200                tzInfo->defaultTZFileSize = 0;
   1201                tzInfo->defaultTZFilePtr = nullptr;
   1202                tzInfo->defaultTZstatus = false;
   1203                tzInfo->defaultTZPosition = 0;
   1204 
   1205                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
   1206 
   1207                /* Free previously allocated memory */
   1208                if (tzInfo->defaultTZBuffer != nullptr) {
   1209                    uprv_free(tzInfo->defaultTZBuffer);
   1210                }
   1211                if (tzInfo->defaultTZFilePtr != nullptr) {
   1212                    fclose(tzInfo->defaultTZFilePtr);
   1213                }
   1214                uprv_free(tzInfo);
   1215            }
   1216 
   1217            if (gTimeZoneBufferPtr != nullptr && isValidOlsonID(gTimeZoneBufferPtr)) {
   1218                return gTimeZoneBufferPtr;
   1219            }
   1220 #endif
   1221        }
   1222    }
   1223    else {
   1224        return gTimeZoneBufferPtr;
   1225    }
   1226 #endif
   1227 #endif
   1228 
   1229 #ifdef U_TZNAME
   1230 #if U_PLATFORM_USES_ONLY_WIN32_API
   1231    /* The return value is free'd in timezone.cpp on Windows because
   1232     * the other code path returns a pointer to a heap location. */
   1233    return uprv_strdup(U_TZNAME[n]);
   1234 #else
   1235    /*
   1236    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
   1237    So we remap the abbreviation to an olson ID.
   1238 
   1239    Since Windows exposes a little more timezone information,
   1240    we normally don't use this code on Windows because
   1241    uprv_detectWindowsTimeZone should have already given the correct answer.
   1242    */
   1243    {
   1244        struct tm juneSol, decemberSol;
   1245        int daylightType;
   1246        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
   1247        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
   1248 
   1249        /* This probing will tell us when daylight savings occurs.  */
   1250        localtime_r(&juneSolstice, &juneSol);
   1251        localtime_r(&decemberSolstice, &decemberSol);
   1252        if(decemberSol.tm_isdst > 0) {
   1253          daylightType = U_DAYLIGHT_DECEMBER;
   1254        } else if(juneSol.tm_isdst > 0) {
   1255          daylightType = U_DAYLIGHT_JUNE;
   1256        } else {
   1257          daylightType = U_DAYLIGHT_NONE;
   1258        }
   1259        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
   1260        if (tzid != nullptr) {
   1261            return tzid;
   1262        }
   1263    }
   1264    return U_TZNAME[n];
   1265 #endif
   1266 #else
   1267    return "";
   1268 #endif
   1269 }
   1270 
   1271 /* Get and set the ICU data directory --------------------------------------- */
   1272 
   1273 static icu::UInitOnce gDataDirInitOnce {};
   1274 static char *gDataDirectory = nullptr;
   1275 
   1276 UInitOnce gTimeZoneFilesInitOnce {};
   1277 static CharString *gTimeZoneFilesDirectory = nullptr;
   1278 
   1279 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
   1280 static const char *gCorrectedPOSIXLocale = nullptr; /* Sometimes heap allocated */
   1281 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
   1282 #endif
   1283 
   1284 static UBool U_CALLCONV putil_cleanup()
   1285 {
   1286    if (gDataDirectory && *gDataDirectory) {
   1287        uprv_free(gDataDirectory);
   1288    }
   1289    gDataDirectory = nullptr;
   1290    gDataDirInitOnce.reset();
   1291 
   1292    delete gTimeZoneFilesDirectory;
   1293    gTimeZoneFilesDirectory = nullptr;
   1294    gTimeZoneFilesInitOnce.reset();
   1295 
   1296 #ifdef SEARCH_TZFILE
   1297    delete gSearchTZFileResult;
   1298    gSearchTZFileResult = nullptr;
   1299 #endif
   1300 
   1301 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
   1302    if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
   1303        uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
   1304        gCorrectedPOSIXLocale = nullptr;
   1305        gCorrectedPOSIXLocaleHeapAllocated = false;
   1306    }
   1307 #endif
   1308    return true;
   1309 }
   1310 
   1311 /*
   1312 * Set the data directory.
   1313 *    Make a copy of the passed string, and set the global data dir to point to it.
   1314 */
   1315 U_CAPI void U_EXPORT2
   1316 u_setDataDirectory(const char *directory) {
   1317    char *newDataDir;
   1318    int32_t length;
   1319 
   1320    if(directory==nullptr || *directory==0) {
   1321        /* A small optimization to prevent the malloc and copy when the
   1322        shared library is used, and this is a way to make sure that nullptr
   1323        is never returned.
   1324        */
   1325        newDataDir = (char *)"";
   1326    }
   1327    else {
   1328        length=(int32_t)uprv_strlen(directory);
   1329        newDataDir = (char *)uprv_malloc(length + 2);
   1330        /* Exit out if newDataDir could not be created. */
   1331        if (newDataDir == nullptr) {
   1332            return;
   1333        }
   1334        uprv_strcpy(newDataDir, directory);
   1335 
   1336 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1337        {
   1338            char *p;
   1339            while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != nullptr) {
   1340                *p = U_FILE_SEP_CHAR;
   1341            }
   1342        }
   1343 #endif
   1344    }
   1345 
   1346    if (gDataDirectory && *gDataDirectory) {
   1347        uprv_free(gDataDirectory);
   1348    }
   1349    gDataDirectory = newDataDir;
   1350    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1351 }
   1352 
   1353 U_CAPI UBool U_EXPORT2
   1354 uprv_pathIsAbsolute(const char *path)
   1355 {
   1356  if(!path || !*path) {
   1357    return false;
   1358  }
   1359 
   1360  if(*path == U_FILE_SEP_CHAR) {
   1361    return true;
   1362  }
   1363 
   1364 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1365  if(*path == U_FILE_ALT_SEP_CHAR) {
   1366    return true;
   1367  }
   1368 #endif
   1369 
   1370 #if U_PLATFORM_USES_ONLY_WIN32_API
   1371  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
   1372       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
   1373      path[1] == ':' ) {
   1374    return true;
   1375  }
   1376 #endif
   1377 
   1378  return false;
   1379 }
   1380 
   1381 /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
   1382   (needed for some Darwin ICU build environments) */
   1383 #if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
   1384 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1385 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
   1386 # endif
   1387 #endif
   1388 
   1389 #if defined(ICU_DATA_DIR_WINDOWS)
   1390 // Helper function to get the ICU Data Directory under the Windows directory location.
   1391 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
   1392 {
   1393    wchar_t windowsPath[MAX_PATH];
   1394    char windowsPathUtf8[MAX_PATH];
   1395 
   1396    UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
   1397    if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
   1398        // Convert UTF-16 to a UTF-8 string.
   1399        UErrorCode status = U_ZERO_ERROR;
   1400        int32_t windowsPathUtf8Len = 0;
   1401        u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
   1402            &windowsPathUtf8Len, reinterpret_cast<const char16_t*>(windowsPath), -1, &status);
   1403 
   1404        if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
   1405            (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
   1406            // Ensure it always has a separator, so we can append the ICU data path.
   1407            if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
   1408                windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
   1409                windowsPathUtf8[windowsPathUtf8Len] = '\0';
   1410            }
   1411            // Check if the concatenated string will fit.
   1412            if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
   1413                uprv_strcpy(directoryBuffer, windowsPathUtf8);
   1414                uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
   1415                return true;
   1416            }
   1417        }
   1418    }
   1419 
   1420    return false;
   1421 }
   1422 #endif
   1423 
   1424 static void U_CALLCONV dataDirectoryInitFn() {
   1425    /* If we already have the directory, then return immediately. Will happen if user called
   1426     * u_setDataDirectory().
   1427     */
   1428    if (gDataDirectory) {
   1429        return;
   1430    }
   1431 
   1432    const char *path = nullptr;
   1433 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1434    char datadir_path_buffer[PATH_MAX];
   1435 #endif
   1436 
   1437    /*
   1438    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
   1439    override ICU's data with the ICU_DATA environment variable. This prevents
   1440    problems where multiple custom copies of ICU's specific version of data
   1441    are installed on a system. Either the application must define the data
   1442    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
   1443    ICU, set the data with udata_setCommonData or trust that all of the
   1444    required data is contained in ICU's data library that contains
   1445    the entry point defined by U_ICUDATA_ENTRY_POINT.
   1446 
   1447    There may also be some platforms where environment variables
   1448    are not allowed.
   1449    */
   1450 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
   1451    /* First try to get the environment variable */
   1452 #     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
   1453        path=getenv("ICU_DATA");
   1454 #     endif
   1455 #   endif
   1456 
   1457    /* ICU_DATA_DIR may be set as a compile option.
   1458     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
   1459     * and is used only when data is built in archive mode eliminating the need
   1460     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
   1461     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
   1462     * set their own path.
   1463     */
   1464 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
   1465    if(path==nullptr || *path==0) {
   1466 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1467        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
   1468 # endif
   1469 # ifdef ICU_DATA_DIR
   1470        path=ICU_DATA_DIR;
   1471 # else
   1472        path=U_ICU_DATA_DEFAULT_DIR;
   1473 # endif
   1474 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1475        if (prefix != nullptr) {
   1476            snprintf(datadir_path_buffer, sizeof(datadir_path_buffer), "%s%s", prefix, path);
   1477            path=datadir_path_buffer;
   1478        }
   1479 # endif
   1480    }
   1481 #endif
   1482 
   1483 #if defined(ICU_DATA_DIR_WINDOWS)
   1484    char datadir_path_buffer[MAX_PATH];
   1485    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
   1486        path = datadir_path_buffer;
   1487    }
   1488 #endif
   1489 
   1490    if(path==nullptr) {
   1491        /* It looks really bad, set it to something. */
   1492        path = "";
   1493    }
   1494 
   1495    u_setDataDirectory(path);
   1496 }
   1497 
   1498 U_CAPI const char * U_EXPORT2
   1499 u_getDataDirectory() {
   1500    umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
   1501    return gDataDirectory;
   1502 }
   1503 
   1504 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
   1505    if (U_FAILURE(status)) {
   1506        return;
   1507    }
   1508    gTimeZoneFilesDirectory->clear();
   1509    gTimeZoneFilesDirectory->append(path, status);
   1510 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1511    char *p = gTimeZoneFilesDirectory->data();
   1512    while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != nullptr) {
   1513        *p = U_FILE_SEP_CHAR;
   1514    }
   1515 #endif
   1516 }
   1517 
   1518 #define TO_STRING(x) TO_STRING_2(x)
   1519 #define TO_STRING_2(x) #x
   1520 
   1521 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
   1522    U_ASSERT(gTimeZoneFilesDirectory == nullptr);
   1523    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1524    gTimeZoneFilesDirectory = new CharString();
   1525    if (gTimeZoneFilesDirectory == nullptr) {
   1526        status = U_MEMORY_ALLOCATION_ERROR;
   1527        return;
   1528    }
   1529 
   1530    const char *dir = "";
   1531 
   1532 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
   1533    char timezonefilesdir_path_buffer[PATH_MAX];
   1534    const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
   1535 #endif
   1536 
   1537 #if U_PLATFORM_HAS_WINUWP_API == 1
   1538 // The UWP version does not support the environment variable setting.
   1539 
   1540 # if defined(ICU_DATA_DIR_WINDOWS)
   1541    // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
   1542    char datadir_path_buffer[MAX_PATH];
   1543    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
   1544        dir = datadir_path_buffer;
   1545    }
   1546 # endif
   1547 
   1548 #else
   1549    dir = getenv("ICU_TIMEZONE_FILES_DIR");
   1550 #endif // U_PLATFORM_HAS_WINUWP_API
   1551 
   1552 #if defined(U_TIMEZONE_FILES_DIR)
   1553    if (dir == nullptr) {
   1554        // Build time configuration setting.
   1555        dir = TO_STRING(U_TIMEZONE_FILES_DIR);
   1556    }
   1557 #endif
   1558 
   1559    if (dir == nullptr) {
   1560        dir = "";
   1561    }
   1562 
   1563 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
   1564    if (prefix != nullptr) {
   1565        snprintf(timezonefilesdir_path_buffer, sizeof(timezonefilesdir_path_buffer), "%s%s", prefix, dir);
   1566        dir = timezonefilesdir_path_buffer;
   1567    }
   1568 #endif
   1569 
   1570    setTimeZoneFilesDir(dir, status);
   1571 }
   1572 
   1573 
   1574 U_CAPI const char * U_EXPORT2
   1575 u_getTimeZoneFilesDirectory(UErrorCode *status) {
   1576    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
   1577    return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
   1578 }
   1579 
   1580 U_CAPI void U_EXPORT2
   1581 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
   1582    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
   1583    setTimeZoneFilesDir(path, *status);
   1584 
   1585    // Note: this function does some extra churn, first setting based on the
   1586    //       environment, then immediately replacing with the value passed in.
   1587    //       The logic is simpler that way, and performance shouldn't be an issue.
   1588 }
   1589 
   1590 
   1591 #if U_POSIX_LOCALE
   1592 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
   1593 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
   1594 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
   1595 */
   1596 static const char *uprv_getPOSIXIDForCategory(int category)
   1597 {
   1598    const char* posixID = nullptr;
   1599    if (category == LC_MESSAGES || category == LC_CTYPE) {
   1600        /*
   1601        * On Solaris two different calls to setlocale can result in
   1602        * different values. Only get this value once.
   1603        *
   1604        * We must check this first because an application can set this.
   1605        *
   1606        * LC_ALL can't be used because it's platform dependent. The LANG
   1607        * environment variable seems to affect LC_CTYPE variable by default.
   1608        * Here is what setlocale(LC_ALL, nullptr) can return.
   1609        * HPUX can return 'C C C C C C C'
   1610        * Solaris can return /en_US/C/C/C/C/C on the second try.
   1611        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
   1612        *
   1613        * The default codepage detection also needs to use LC_CTYPE.
   1614        *
   1615        * Do not call setlocale(LC_*, "")! Using an empty string instead
   1616        * of nullptr, will modify the libc behavior.
   1617        */
   1618        posixID = setlocale(category, nullptr);
   1619        if ((posixID == nullptr)
   1620            || (uprv_strcmp("C", posixID) == 0)
   1621            || (uprv_strcmp("POSIX", posixID) == 0))
   1622        {
   1623            /* Maybe we got some garbage.  Try something more reasonable */
   1624            posixID = getenv("LC_ALL");
   1625            /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
   1626             * This is needed to properly handle empty env. variables
   1627             */
   1628 #if U_PLATFORM == U_PF_SOLARIS
   1629            if ((posixID == 0) || (posixID[0] == '\0')) {
   1630                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
   1631                if ((posixID == 0) || (posixID[0] == '\0')) {
   1632 #else
   1633            if (posixID == nullptr) {
   1634                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
   1635                if (posixID == nullptr) {
   1636 #endif
   1637                    posixID = getenv("LANG");
   1638                }
   1639            }
   1640        }
   1641    }
   1642    if ((posixID == nullptr)
   1643        || (uprv_strcmp("C", posixID) == 0)
   1644        || (uprv_strcmp("POSIX", posixID) == 0))
   1645    {
   1646        /* Nothing worked.  Give it a nice POSIX default value. */
   1647        posixID = "en_US_POSIX";
   1648        // Note: this test will not catch 'C.UTF-8',
   1649        // that will be handled in uprv_getDefaultLocaleID().
   1650        // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
   1651        // caller which expects to see "en_US_POSIX" in many branches.
   1652    }
   1653    return posixID;
   1654 }
   1655 
   1656 /* Return just the POSIX id for the default locale, whatever happens to be in
   1657 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
   1658 */
   1659 static const char *uprv_getPOSIXIDForDefaultLocale()
   1660 {
   1661    static const char* posixID = nullptr;
   1662    if (posixID == nullptr) {
   1663        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
   1664    }
   1665    return posixID;
   1666 }
   1667 
   1668 #if !U_CHARSET_IS_UTF8
   1669 /* Return just the POSIX id for the default codepage, whatever happens to be in
   1670 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
   1671 */
   1672 static const char *uprv_getPOSIXIDForDefaultCodepage()
   1673 {
   1674    static const char* posixID = nullptr;
   1675    if (posixID == 0) {
   1676        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
   1677    }
   1678    return posixID;
   1679 }
   1680 #endif
   1681 #endif
   1682 
   1683 /* NOTE: The caller should handle thread safety */
   1684 U_CAPI const char* U_EXPORT2
   1685 uprv_getDefaultLocaleID()
   1686 {
   1687 #if U_POSIX_LOCALE
   1688 /*
   1689  Note that:  (a '!' means the ID is improper somehow)
   1690     LC_ALL  ---->     default_loc          codepage
   1691 --------------------------------------------------------
   1692     ab.CD             ab                   CD
   1693     ab@CD             ab__CD               -
   1694     ab@CD.EF          ab__CD               EF
   1695 
   1696     ab_CD.EF@GH       ab_CD_GH             EF
   1697 
   1698 Some 'improper' ways to do the same as above:
   1699  !  ab_CD@GH.EF       ab_CD_GH             EF
   1700  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
   1701  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
   1702 
   1703     _CD@GH            _CD_GH               -
   1704     _CD.EF@GH         _CD_GH               EF
   1705 
   1706 The variant cannot have dots in it.
   1707 The 'rightmost' variant (@xxx) wins.
   1708 The leftmost codepage (.xxx) wins.
   1709 */
   1710    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
   1711 
   1712    /* Format: (no spaces)
   1713    ll [ _CC ] [ . MM ] [ @ VV]
   1714 
   1715      l = lang, C = ctry, M = charmap, V = variant
   1716    */
   1717 
   1718    if (gCorrectedPOSIXLocale != nullptr) {
   1719        return gCorrectedPOSIXLocale;
   1720    }
   1721 
   1722    // Copy the ID into owned memory.
   1723    // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
   1724    char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
   1725    if (correctedPOSIXLocale == nullptr) {
   1726        return nullptr;
   1727    }
   1728    uprv_strcpy(correctedPOSIXLocale, posixID);
   1729 
   1730    char *limit;
   1731    if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
   1732        *limit = 0;
   1733    }
   1734    if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
   1735        *limit = 0;
   1736    }
   1737 
   1738    if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
   1739        || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
   1740      // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
   1741      // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
   1742      uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
   1743    }
   1744 
   1745    /* Note that we scan the *uncorrected* ID. */
   1746    const char *p;
   1747    if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
   1748        p++;
   1749 
   1750        /* Take care of any special cases here.. */
   1751        if (!uprv_strcmp(p, "nynorsk")) {
   1752            p = "NY";
   1753            /* Don't worry about no__NY. In practice, it won't appear. */
   1754        }
   1755 
   1756        if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
   1757            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
   1758        }
   1759        else {
   1760            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
   1761        }
   1762 
   1763        const char *q;
   1764        if ((q = uprv_strchr(p, '.')) != nullptr) {
   1765            /* How big will the resulting string be? */
   1766            int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
   1767            uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
   1768            correctedPOSIXLocale[len] = 0;
   1769        }
   1770        else {
   1771            /* Anything following the @ sign */
   1772            uprv_strcat(correctedPOSIXLocale, p);
   1773        }
   1774 
   1775        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
   1776         * How about 'russian' -> 'ru'?
   1777         * Many of the other locales using ISO codes will be handled by the
   1778         * canonicalization functions in uloc_getDefault.
   1779         */
   1780    }
   1781 
   1782    if (gCorrectedPOSIXLocale == nullptr) {
   1783        gCorrectedPOSIXLocale = correctedPOSIXLocale;
   1784        gCorrectedPOSIXLocaleHeapAllocated = true;
   1785        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1786        correctedPOSIXLocale = nullptr;
   1787    }
   1788    posixID = gCorrectedPOSIXLocale;
   1789 
   1790    if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
   1791        uprv_free(correctedPOSIXLocale);
   1792    }
   1793 
   1794    return posixID;
   1795 
   1796 #elif U_PLATFORM_USES_ONLY_WIN32_API
   1797 #define POSIX_LOCALE_CAPACITY 64
   1798    UErrorCode status = U_ZERO_ERROR;
   1799    char *correctedPOSIXLocale = nullptr;
   1800 
   1801    // If we have already figured this out just use the cached value
   1802    if (gCorrectedPOSIXLocale != nullptr) {
   1803        return gCorrectedPOSIXLocale;
   1804    }
   1805 
   1806    // No cached value, need to determine the current value
   1807    static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
   1808    int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
   1809 
   1810    // Now we should have a Windows locale name that needs converted to the POSIX style.
   1811    if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
   1812    {
   1813        // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
   1814        char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
   1815 
   1816        int32_t i;
   1817        for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
   1818        {
   1819            if (windowsLocale[i] == '_')
   1820            {
   1821                modifiedWindowsLocale[i] = '-';
   1822            }
   1823            else
   1824            {
   1825                modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
   1826            }
   1827 
   1828            if (modifiedWindowsLocale[i] == '\0')
   1829            {
   1830                break;
   1831            }
   1832        }
   1833 
   1834        if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
   1835        {
   1836            // Ran out of room, can't really happen, maybe we'll be lucky about a matching
   1837            // locale when tags are dropped
   1838            modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
   1839        }
   1840 
   1841        // Now normalize the resulting name
   1842        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
   1843        /* TODO: Should we just exit on memory allocation failure? */
   1844        if (correctedPOSIXLocale)
   1845        {
   1846            int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
   1847            if (U_SUCCESS(status))
   1848            {
   1849                *(correctedPOSIXLocale + posixLen) = 0;
   1850                gCorrectedPOSIXLocale = correctedPOSIXLocale;
   1851                gCorrectedPOSIXLocaleHeapAllocated = true;
   1852                ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1853            }
   1854            else
   1855            {
   1856                uprv_free(correctedPOSIXLocale);
   1857            }
   1858        }
   1859    }
   1860 
   1861    // If unable to find a locale we can agree upon, use en-US by default
   1862    if (gCorrectedPOSIXLocale == nullptr) {
   1863        gCorrectedPOSIXLocale = "en_US";
   1864    }
   1865    return gCorrectedPOSIXLocale;
   1866 
   1867 #elif U_PLATFORM == U_PF_OS400
   1868    /* locales are process scoped and are by definition thread safe */
   1869    static char correctedLocale[64];
   1870    const  char *localeID = getenv("LC_ALL");
   1871           char *p;
   1872 
   1873    if (localeID == nullptr)
   1874        localeID = getenv("LANG");
   1875    if (localeID == nullptr)
   1876        localeID = setlocale(LC_ALL, nullptr);
   1877    /* Make sure we have something... */
   1878    if (localeID == nullptr)
   1879        return "en_US_POSIX";
   1880 
   1881    /* Extract the locale name from the path. */
   1882    if((p = uprv_strrchr(localeID, '/')) != nullptr)
   1883    {
   1884        /* Increment p to start of locale name. */
   1885        p++;
   1886        localeID = p;
   1887    }
   1888 
   1889    /* Copy to work location. */
   1890    uprv_strcpy(correctedLocale, localeID);
   1891 
   1892    /* Strip off the '.locale' extension. */
   1893    if((p = uprv_strchr(correctedLocale, '.')) != nullptr) {
   1894        *p = 0;
   1895    }
   1896 
   1897    /* Upper case the locale name. */
   1898    T_CString_toUpperCase(correctedLocale);
   1899 
   1900    /* See if we are using the POSIX locale.  Any of the
   1901    * following are equivalent and use the same QLGPGCMA
   1902    * (POSIX) locale.
   1903    * QLGPGCMA2 means UCS2
   1904    * QLGPGCMA_4 means UTF-32
   1905    * QLGPGCMA_8 means UTF-8
   1906    */
   1907    if ((uprv_strcmp("C", correctedLocale) == 0) ||
   1908        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
   1909        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
   1910    {
   1911        uprv_strcpy(correctedLocale, "en_US_POSIX");
   1912    }
   1913    else
   1914    {
   1915        int16_t LocaleLen;
   1916 
   1917        /* Lower case the lang portion. */
   1918        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
   1919        {
   1920            *p = uprv_tolower(*p);
   1921        }
   1922 
   1923        /* Adjust for Euro.  After '_E' add 'URO'. */
   1924        LocaleLen = uprv_strlen(correctedLocale);
   1925        if (correctedLocale[LocaleLen - 2] == '_' &&
   1926            correctedLocale[LocaleLen - 1] == 'E')
   1927        {
   1928            uprv_strcat(correctedLocale, "URO");
   1929        }
   1930 
   1931        /* If using Lotus-based locale then convert to
   1932         * equivalent non Lotus.
   1933         */
   1934        else if (correctedLocale[LocaleLen - 2] == '_' &&
   1935            correctedLocale[LocaleLen - 1] == 'L')
   1936        {
   1937            correctedLocale[LocaleLen - 2] = 0;
   1938        }
   1939 
   1940        /* There are separate simplified and traditional
   1941         * locales called zh_HK_S and zh_HK_T.
   1942         */
   1943        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
   1944        {
   1945            uprv_strcpy(correctedLocale, "zh_HK");
   1946        }
   1947 
   1948        /* A special zh_CN_GBK locale...
   1949        */
   1950        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
   1951        {
   1952            uprv_strcpy(correctedLocale, "zh_CN");
   1953        }
   1954 
   1955    }
   1956 
   1957    return correctedLocale;
   1958 #endif
   1959 
   1960 }
   1961 
   1962 #if !U_CHARSET_IS_UTF8
   1963 #if U_POSIX_LOCALE
   1964 /*
   1965 Due to various platform differences, one platform may specify a charset,
   1966 when they really mean a different charset. Remap the names so that they are
   1967 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
   1968 here. Before adding anything to this function, please consider adding unique
   1969 names to the ICU alias table in the data directory.
   1970 */
   1971 static const char*
   1972 remapPlatformDependentCodepage(const char *locale, const char *name) {
   1973    if (locale != nullptr && *locale == 0) {
   1974        /* Make sure that an empty locale is handled the same way. */
   1975        locale = nullptr;
   1976    }
   1977    if (name == nullptr) {
   1978        return nullptr;
   1979    }
   1980 #if U_PLATFORM == U_PF_AIX
   1981    if (uprv_strcmp(name, "IBM-943") == 0) {
   1982        /* Use the ASCII compatible ibm-943 */
   1983        name = "Shift-JIS";
   1984    }
   1985    else if (uprv_strcmp(name, "IBM-1252") == 0) {
   1986        /* Use the windows-1252 that contains the Euro */
   1987        name = "IBM-5348";
   1988    }
   1989 #elif U_PLATFORM == U_PF_SOLARIS
   1990    if (locale != nullptr && uprv_strcmp(name, "EUC") == 0) {
   1991        /* Solaris underspecifies the "EUC" name. */
   1992        if (uprv_strcmp(locale, "zh_CN") == 0) {
   1993            name = "EUC-CN";
   1994        }
   1995        else if (uprv_strcmp(locale, "zh_TW") == 0) {
   1996            name = "EUC-TW";
   1997        }
   1998        else if (uprv_strcmp(locale, "ko_KR") == 0) {
   1999            name = "EUC-KR";
   2000        }
   2001    }
   2002    else if (uprv_strcmp(name, "eucJP") == 0) {
   2003        /*
   2004        ibm-954 is the best match.
   2005        ibm-33722 is the default for eucJP (similar to Windows).
   2006        */
   2007        name = "eucjis";
   2008    }
   2009    else if (uprv_strcmp(name, "646") == 0) {
   2010        /*
   2011         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
   2012         * ISO-8859-1 instead of US-ASCII(646).
   2013         */
   2014        name = "ISO-8859-1";
   2015    }
   2016 #elif U_PLATFORM_IS_DARWIN_BASED
   2017    if (locale == nullptr && *name == 0) {
   2018        /*
   2019        No locale was specified, and an empty name was passed in.
   2020        This usually indicates that nl_langinfo didn't return valid information.
   2021        Mac OS X uses UTF-8 by default (especially the locale data and console).
   2022        */
   2023        name = "UTF-8";
   2024    }
   2025    else if (uprv_strcmp(name, "CP949") == 0) {
   2026        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   2027        name = "EUC-KR";
   2028    }
   2029    else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
   2030        /*
   2031         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
   2032         */
   2033        name = "UTF-8";
   2034    }
   2035 #elif U_PLATFORM == U_PF_BSD
   2036    if (uprv_strcmp(name, "CP949") == 0) {
   2037        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   2038        name = "EUC-KR";
   2039    }
   2040 #elif U_PLATFORM == U_PF_HPUX
   2041    if (locale != nullptr && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
   2042        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
   2043        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
   2044        name = "hkbig5";
   2045    }
   2046    else if (uprv_strcmp(name, "eucJP") == 0) {
   2047        /*
   2048        ibm-1350 is the best match, but unavailable.
   2049        ibm-954 is mostly a superset of ibm-1350.
   2050        ibm-33722 is the default for eucJP (similar to Windows).
   2051        */
   2052        name = "eucjis";
   2053    }
   2054 #elif U_PLATFORM == U_PF_LINUX
   2055    if (locale != nullptr && uprv_strcmp(name, "euc") == 0) {
   2056        /* Linux underspecifies the "EUC" name. */
   2057        if (uprv_strcmp(locale, "korean") == 0) {
   2058            name = "EUC-KR";
   2059        }
   2060        else if (uprv_strcmp(locale, "japanese") == 0) {
   2061            /* See comment below about eucJP */
   2062            name = "eucjis";
   2063        }
   2064    }
   2065    else if (uprv_strcmp(name, "eucjp") == 0) {
   2066        /*
   2067        ibm-1350 is the best match, but unavailable.
   2068        ibm-954 is mostly a superset of ibm-1350.
   2069        ibm-33722 is the default for eucJP (similar to Windows).
   2070        */
   2071        name = "eucjis";
   2072    }
   2073    else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
   2074            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
   2075        /*
   2076         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
   2077         */
   2078        name = "UTF-8";
   2079    }
   2080    /*
   2081     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
   2082     * it by falling back to 'US-ASCII' when nullptr is returned from this
   2083     * function. So, we don't have to worry about it here.
   2084     */
   2085 #endif
   2086    /* return nullptr when "" is passed in */
   2087    if (*name == 0) {
   2088        name = nullptr;
   2089    }
   2090    return name;
   2091 }
   2092 
   2093 static const char*
   2094 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
   2095 {
   2096    char localeBuf[100];
   2097    const char *name = nullptr;
   2098    char *variant = nullptr;
   2099 
   2100    if (localeName != nullptr && (name = (uprv_strchr(localeName, '.'))) != nullptr) {
   2101        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
   2102        uprv_strncpy(localeBuf, localeName, localeCapacity);
   2103        localeBuf[localeCapacity-1] = 0; /* ensure NUL termination */
   2104        name = uprv_strncpy(buffer, name+1, buffCapacity);
   2105        buffer[buffCapacity-1] = 0; /* ensure NUL termination */
   2106        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != nullptr) {
   2107            *variant = 0;
   2108        }
   2109        name = remapPlatformDependentCodepage(localeBuf, name);
   2110    }
   2111    return name;
   2112 }
   2113 #endif
   2114 
   2115 static const char*
   2116 int_getDefaultCodepage()
   2117 {
   2118 #if U_PLATFORM == U_PF_OS400
   2119    uint32_t ccsid = 37; /* Default to ibm-37 */
   2120    static char codepage[64];
   2121    Qwc_JOBI0400_t jobinfo;
   2122    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
   2123 
   2124    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
   2125        "*                         ", "                ", &error);
   2126 
   2127    if (error.Bytes_Available == 0) {
   2128        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
   2129            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
   2130        }
   2131        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
   2132            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
   2133        }
   2134        /* else use the default */
   2135    }
   2136    snprintf(codepage, sizeof(codepage), "ibm-%d", ccsid);
   2137    return codepage;
   2138 
   2139 #elif U_PLATFORM == U_PF_OS390
   2140    static char codepage[64];
   2141 
   2142    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
   2143    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
   2144    codepage[63] = 0; /* NUL terminate */
   2145 
   2146    return codepage;
   2147 
   2148 #elif U_PLATFORM_USES_ONLY_WIN32_API
   2149    static char codepage[64];
   2150    DWORD codepageNumber = 0;
   2151 
   2152 #if U_PLATFORM_HAS_WINUWP_API == 1
   2153    // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
   2154    // have folks use Unicode than a "system" code page, however this is the same
   2155    // codepage as the system default locale codepage.  (FWIW, the system locale is
   2156    // ONLY used for codepage, it should never be used for anything else)
   2157    GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
   2158        (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
   2159 #else
   2160    // Win32 apps can call GetACP
   2161    codepageNumber = GetACP();
   2162 #endif
   2163    // Special case for UTF-8
   2164    if (codepageNumber == 65001)
   2165    {
   2166        return "UTF-8";
   2167    }
   2168    // Windows codepages can look like windows-1252, so format the found number
   2169    // the numbers are eclectic, however all valid system code pages, besides UTF-8
   2170    // are between 3 and 19999
   2171    if (codepageNumber > 0 && codepageNumber < 20000)
   2172    {
   2173        snprintf(codepage, sizeof(codepage), "windows-%ld", codepageNumber);
   2174        return codepage;
   2175    }
   2176    // If the codepage number call failed then return UTF-8
   2177    return "UTF-8";
   2178 
   2179 #elif U_POSIX_LOCALE
   2180    static char codesetName[100];
   2181    const char *localeName = nullptr;
   2182    const char *name = nullptr;
   2183 
   2184    localeName = uprv_getPOSIXIDForDefaultCodepage();
   2185    uprv_memset(codesetName, 0, sizeof(codesetName));
   2186    /* On Solaris nl_langinfo returns C locale values unless setlocale
   2187     * was called earlier.
   2188     */
   2189 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
   2190    /* When available, check nl_langinfo first because it usually gives more
   2191       useful names. It depends on LC_CTYPE.
   2192       nl_langinfo may use the same buffer as setlocale. */
   2193    {
   2194        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
   2195 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
   2196        /*
   2197         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
   2198         * instead of ASCII.
   2199         */
   2200        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
   2201            codeset = remapPlatformDependentCodepage(localeName, codeset);
   2202        } else
   2203 #endif
   2204        {
   2205            codeset = remapPlatformDependentCodepage(nullptr, codeset);
   2206        }
   2207 
   2208        if (codeset != nullptr) {
   2209            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
   2210            codesetName[sizeof(codesetName)-1] = 0;
   2211            return codesetName;
   2212        }
   2213    }
   2214 #endif
   2215 
   2216    /* Use setlocale in a nice way, and then check some environment variables.
   2217       Maybe the application used setlocale already.
   2218    */
   2219    uprv_memset(codesetName, 0, sizeof(codesetName));
   2220    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
   2221    if (name) {
   2222        /* if we can find the codeset name from setlocale, return that. */
   2223        return name;
   2224    }
   2225 
   2226    if (*codesetName == 0)
   2227    {
   2228        /* Everything failed. Return US ASCII (ISO 646). */
   2229        (void)uprv_strcpy(codesetName, "US-ASCII");
   2230    }
   2231    return codesetName;
   2232 #else
   2233    return "US-ASCII";
   2234 #endif
   2235 }
   2236 
   2237 
   2238 U_CAPI const char*  U_EXPORT2
   2239 uprv_getDefaultCodepage()
   2240 {
   2241    static char const  *name = nullptr;
   2242    umtx_lock(nullptr);
   2243    if (name == nullptr) {
   2244        name = int_getDefaultCodepage();
   2245    }
   2246    umtx_unlock(nullptr);
   2247    return name;
   2248 }
   2249 #endif  /* !U_CHARSET_IS_UTF8 */
   2250 
   2251 
   2252 /* end of platform-specific implementation -------------- */
   2253 
   2254 /* version handling --------------------------------------------------------- */
   2255 
   2256 U_CAPI void U_EXPORT2
   2257 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
   2258    char *end;
   2259    uint16_t part=0;
   2260 
   2261    if(versionArray==nullptr) {
   2262        return;
   2263    }
   2264 
   2265    if(versionString!=nullptr) {
   2266        for(;;) {
   2267            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
   2268            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
   2269                break;
   2270            }
   2271            versionString=end+1;
   2272        }
   2273    }
   2274 
   2275    while(part<U_MAX_VERSION_LENGTH) {
   2276        versionArray[part++]=0;
   2277    }
   2278 }
   2279 
   2280 U_CAPI void U_EXPORT2
   2281 u_versionFromUString(UVersionInfo versionArray, const char16_t *versionString) {
   2282    if(versionArray!=nullptr && versionString!=nullptr) {
   2283        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
   2284        int32_t len = u_strlen(versionString);
   2285        if(len>U_MAX_VERSION_STRING_LENGTH) {
   2286            len = U_MAX_VERSION_STRING_LENGTH;
   2287        }
   2288        u_UCharsToChars(versionString, versionChars, len);
   2289        versionChars[len]=0;
   2290        u_versionFromString(versionArray, versionChars);
   2291    }
   2292 }
   2293 
   2294 U_CAPI void U_EXPORT2
   2295 u_versionToString(const UVersionInfo versionArray, char *versionString) {
   2296    uint16_t count, part;
   2297    uint8_t field;
   2298 
   2299    if(versionString==nullptr) {
   2300        return;
   2301    }
   2302 
   2303    if(versionArray==nullptr) {
   2304        versionString[0]=0;
   2305        return;
   2306    }
   2307 
   2308    /* count how many fields need to be written */
   2309    for(count=4; count>0 && versionArray[count-1]==0; --count) {
   2310    }
   2311 
   2312    if(count <= 1) {
   2313        count = 2;
   2314    }
   2315 
   2316    /* write the first part */
   2317    /* write the decimal field value */
   2318    field=versionArray[0];
   2319    if(field>=100) {
   2320        *versionString++=(char)('0'+field/100);
   2321        field%=100;
   2322    }
   2323    if(field>=10) {
   2324        *versionString++=(char)('0'+field/10);
   2325        field%=10;
   2326    }
   2327    *versionString++=(char)('0'+field);
   2328 
   2329    /* write the following parts */
   2330    for(part=1; part<count; ++part) {
   2331        /* write a dot first */
   2332        *versionString++=U_VERSION_DELIMITER;
   2333 
   2334        /* write the decimal field value */
   2335        field=versionArray[part];
   2336        if(field>=100) {
   2337            *versionString++=(char)('0'+field/100);
   2338            field%=100;
   2339        }
   2340        if(field>=10) {
   2341            *versionString++=(char)('0'+field/10);
   2342            field%=10;
   2343        }
   2344        *versionString++=(char)('0'+field);
   2345    }
   2346 
   2347    /* NUL-terminate */
   2348    *versionString=0;
   2349 }
   2350 
   2351 U_CAPI void U_EXPORT2
   2352 u_getVersion(UVersionInfo versionArray) {
   2353    (void)copyright;   // Suppress unused variable warning from clang.
   2354    u_versionFromString(versionArray, U_ICU_VERSION);
   2355 }
   2356 
   2357 /**
   2358 * icucfg.h dependent code
   2359 */
   2360 
   2361 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
   2362 
   2363 #if HAVE_DLFCN_H
   2364 #ifdef __MVS__
   2365 #ifndef __SUSV3
   2366 #define __SUSV3 1
   2367 #endif
   2368 #endif
   2369 #include <dlfcn.h>
   2370 #endif /* HAVE_DLFCN_H */
   2371 
   2372 U_CAPI void * U_EXPORT2
   2373 uprv_dl_open(const char *libName, UErrorCode *status) {
   2374  void *ret = nullptr;
   2375  if(U_FAILURE(*status)) return ret;
   2376  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
   2377  if(ret==nullptr) {
   2378 #ifdef U_TRACE_DYLOAD
   2379    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
   2380 #endif
   2381    *status = U_MISSING_RESOURCE_ERROR;
   2382  }
   2383  return ret;
   2384 }
   2385 
   2386 U_CAPI void U_EXPORT2
   2387 uprv_dl_close(void *lib, UErrorCode *status) {
   2388  if(U_FAILURE(*status)) return;
   2389  dlclose(lib);
   2390 }
   2391 
   2392 U_CAPI UVoidFunction* U_EXPORT2
   2393 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2394  union {
   2395      UVoidFunction *fp;
   2396      void *vp;
   2397  } uret;
   2398  uret.fp = nullptr;
   2399  if(U_FAILURE(*status)) return uret.fp;
   2400  uret.vp = dlsym(lib, sym);
   2401  if(uret.vp == nullptr) {
   2402 #ifdef U_TRACE_DYLOAD
   2403    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
   2404 #endif
   2405    *status = U_MISSING_RESOURCE_ERROR;
   2406  }
   2407  return uret.fp;
   2408 }
   2409 
   2410 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
   2411 
   2412 /* Windows API implementation. */
   2413 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
   2414 
   2415 U_CAPI void * U_EXPORT2
   2416 uprv_dl_open(const char *libName, UErrorCode *status) {
   2417  HMODULE lib = nullptr;
   2418 
   2419  if(U_FAILURE(*status)) return nullptr;
   2420 
   2421  lib = LoadLibraryA(libName);
   2422 
   2423  if(lib==nullptr) {
   2424    *status = U_MISSING_RESOURCE_ERROR;
   2425  }
   2426 
   2427  return (void*)lib;
   2428 }
   2429 
   2430 U_CAPI void U_EXPORT2
   2431 uprv_dl_close(void *lib, UErrorCode *status) {
   2432  HMODULE handle = (HMODULE)lib;
   2433  if(U_FAILURE(*status)) return;
   2434 
   2435  FreeLibrary(handle);
   2436 
   2437  return;
   2438 }
   2439 
   2440 U_CAPI UVoidFunction* U_EXPORT2
   2441 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2442  HMODULE handle = (HMODULE)lib;
   2443  UVoidFunction* addr = nullptr;
   2444 
   2445  if(U_FAILURE(*status) || lib==nullptr) return nullptr;
   2446 
   2447  addr = (UVoidFunction*)GetProcAddress(handle, sym);
   2448 
   2449  if(addr==nullptr) {
   2450    DWORD lastError = GetLastError();
   2451    if(lastError == ERROR_PROC_NOT_FOUND) {
   2452      *status = U_MISSING_RESOURCE_ERROR;
   2453    } else {
   2454      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
   2455    }
   2456  }
   2457 
   2458  return addr;
   2459 }
   2460 
   2461 #else
   2462 
   2463 /* No dynamic loading, null (nonexistent) implementation. */
   2464 
   2465 U_CAPI void * U_EXPORT2
   2466 uprv_dl_open(const char *libName, UErrorCode *status) {
   2467    (void)libName;
   2468    if(U_FAILURE(*status)) return nullptr;
   2469    *status = U_UNSUPPORTED_ERROR;
   2470    return nullptr;
   2471 }
   2472 
   2473 U_CAPI void U_EXPORT2
   2474 uprv_dl_close(void *lib, UErrorCode *status) {
   2475    (void)lib;
   2476    if(U_FAILURE(*status)) return;
   2477    *status = U_UNSUPPORTED_ERROR;
   2478    return;
   2479 }
   2480 
   2481 U_CAPI UVoidFunction* U_EXPORT2
   2482 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2483  (void)lib;
   2484  (void)sym;
   2485  if(U_SUCCESS(*status)) {
   2486    *status = U_UNSUPPORTED_ERROR;
   2487  }
   2488  return (UVoidFunction*)nullptr;
   2489 }
   2490 
   2491 #endif
   2492 
   2493 /*
   2494 * Hey, Emacs, please set the following:
   2495 *
   2496 * Local Variables:
   2497 * indent-tabs-mode: nil
   2498 * End:
   2499 *
   2500 */