tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

locmap.cpp (44873B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1996-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * Provides functionality for mapping between
     10 * LCID and Posix IDs or ICU locale to codepage
     11 *
     12 * Note: All classes and code in this file are
     13 *       intended for internal use only.
     14 *
     15 * Methods of interest:
     16 *   unsigned long convertToLCID(const char*);
     17 *   const char* convertToPosix(unsigned long);
     18 *
     19 * Kathleen Wilson, 4/30/96
     20 *
     21 *  Date        Name        Description
     22 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
     23 *                          setId() method and safety check against 
     24 *                          MAX_ID_LENGTH.
     25 * 04/23/99     stephen     Added C wrapper for convertToPosix.
     26 * 09/18/00     george      Removed the memory leaks.
     27 * 08/23/01     george      Convert to C
     28 */
     29 
     30 #include "locmap.h"
     31 #include "charstr.h"
     32 #include "cstring.h"
     33 #include "cmemory.h"
     34 #include "ulocimp.h"
     35 #include "unicode/uloc.h"
     36 
     37 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
     38 #include <windows.h>
     39 #include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
     40 #endif
     41 
     42 /*
     43 * Note:
     44 * The mapping from Win32 locale ID numbers to POSIX locale strings should
     45 * be the faster one.
     46 *
     47 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
     48 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
     49 */
     50 
     51 namespace {
     52 
     53 /*
     54 ////////////////////////////////////////////////
     55 //
     56 // Internal Classes for LCID <--> POSIX Mapping
     57 //
     58 /////////////////////////////////////////////////
     59 */
     60 
     61 typedef struct ILcidPosixElement
     62 {
     63    const uint32_t hostID;
     64    const char * const posixID;
     65 } ILcidPosixElement;
     66 
     67 typedef struct ILcidPosixMap
     68 {
     69    const uint32_t numRegions;
     70    const struct ILcidPosixElement* const regionMaps;
     71 } ILcidPosixMap;
     72 
     73 
     74 /*
     75 /////////////////////////////////////////////////
     76 //
     77 // Easy macros to make the LCID <--> POSIX Mapping
     78 //
     79 /////////////////////////////////////////////////
     80 */
     81 
     82 /**
     83 * The standard one language/one country mapping for LCID.
     84 * The first element must be the language, and the following
     85 * elements are the language with the country.
     86 * @param hostID LCID in host format such as 0x044d
     87 * @param languageID posix ID of just the language such as 'de'
     88 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
     89 */
     90 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
     91 constexpr ILcidPosixElement locmap_ ## languageID [] = { \
     92    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
     93    {hostID, #posixID}, \
     94 };
     95 
     96 /**
     97 * Define a subtable by ID
     98 * @param id the POSIX ID, either a language or language_TERRITORY
     99 */
    100 #define ILCID_POSIX_SUBTABLE(id) \
    101 constexpr ILcidPosixElement locmap_ ## id [] =
    102 
    103 
    104 /**
    105 * Create the map for the posixID. This macro supposes that the language string
    106 * name is the same as the global variable name, and that the first element
    107 * in the ILcidPosixElement is just the language.
    108 * @param _posixID the full POSIX ID for this entry.
    109 */
    110 #define ILCID_POSIX_MAP(_posixID) \
    111    {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
    112 
    113 /*
    114 ////////////////////////////////////////////
    115 //
    116 // Create the table of LCID to POSIX Mapping
    117 // None of it should be dynamically created.
    118 //
    119 // Keep static locale variables inside the function so that
    120 // it can be created properly during static init.
    121 //
    122 // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier 
    123 //       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
    124 //
    125 //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
    126 //       maintained for support of older Windows version.
    127 //       Update: Windows 7 (091130)
    128 //
    129 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
    130 //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
    131 //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
    132 //       to support other keywords in this mapping data, we must update the implementation.
    133 ////////////////////////////////////////////
    134 */
    135 
    136 // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as 
    137 // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
    138 
    139 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
    140 
    141 ILCID_POSIX_SUBTABLE(ar) {
    142    {0x01,   "ar"},
    143    {0x3801, "ar_AE"},
    144    {0x3c01, "ar_BH"},
    145    {0x1401, "ar_DZ"},
    146    {0x0c01, "ar_EG"},
    147    {0x0801, "ar_IQ"},
    148    {0x2c01, "ar_JO"},
    149    {0x3401, "ar_KW"},
    150    {0x3001, "ar_LB"},
    151    {0x1001, "ar_LY"},
    152    {0x1801, "ar_MA"},
    153    {0x1801, "ar_MO"},
    154    {0x2001, "ar_OM"},
    155    {0x4001, "ar_QA"},
    156    {0x0401, "ar_SA"},
    157    {0x2801, "ar_SY"},
    158    {0x1c01, "ar_TN"},
    159    {0x2401, "ar_YE"}
    160 };
    161 
    162 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
    163 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
    164 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
    165 
    166 ILCID_POSIX_SUBTABLE(az) {
    167    {0x2c,   "az"},
    168    {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
    169    {0x742c, "az_Cyrl"},  /* Cyrillic based */
    170    {0x042c, "az_Latn_AZ"}, /* Latin based */
    171    {0x782c, "az_Latn"}, /* Latin based */
    172    {0x042c, "az_AZ"} /* Latin based */
    173 };
    174 
    175 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
    176 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
    177 
    178 /*ILCID_POSIX_SUBTABLE(ber) {
    179    {0x5f,   "ber"},
    180    {0x045f, "ber_Arab_DZ"},
    181    {0x045f, "ber_Arab"},
    182    {0x085f, "ber_Latn_DZ"},
    183    {0x085f, "ber_Latn"}
    184 };*/
    185 
    186 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
    187 
    188 ILCID_POSIX_SUBTABLE(bin) {
    189    {0x66, "bin"},
    190    {0x0466, "bin_NG"}
    191 };
    192 
    193 ILCID_POSIX_SUBTABLE(bn) {
    194    {0x45,   "bn"},
    195    {0x0845, "bn_BD"},
    196    {0x0445, "bn_IN"}
    197 };
    198 
    199 ILCID_POSIX_SUBTABLE(bo) {
    200    {0x51,   "bo"},
    201    {0x0851, "bo_BT"},
    202    {0x0451, "bo_CN"},
    203    {0x0c51, "dz_BT"}
    204 };
    205 
    206 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
    207 
    208 ILCID_POSIX_SUBTABLE(ca) {
    209    {0x03,   "ca"},
    210    {0x0403, "ca_ES"},
    211    {0x0803, "ca_ES_VALENCIA"}
    212 };
    213 
    214 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
    215 
    216 ILCID_POSIX_SUBTABLE(chr) {
    217    {0x05c,  "chr"},
    218    {0x7c5c, "chr_Cher"},
    219    {0x045c, "chr_Cher_US"},
    220    {0x045c, "chr_US"}
    221 };
    222 
    223 // ICU has chosen different names for these.
    224 ILCID_POSIX_SUBTABLE(ckb) {
    225    {0x92,   "ckb"},
    226    {0x7c92, "ckb_Arab"},
    227    {0x0492, "ckb_Arab_IQ"}
    228 };
    229 
    230 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
    231 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
    232 
    233 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
    234 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
    235 
    236 // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
    237 ILCID_POSIX_SUBTABLE(de) {
    238    {0x07,   "de"},
    239    {0x0c07, "de_AT"},
    240    {0x0807, "de_CH"},
    241    {0x0407, "de_DE"},
    242    {0x1407, "de_LI"},
    243    {0x1007, "de_LU"},
    244    {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
    245    {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
    246 };
    247 
    248 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
    249 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
    250 
    251 // Windows uses an empty string for 'invariant'
    252 ILCID_POSIX_SUBTABLE(en) {
    253    {0x09,   "en"},
    254    {0x0c09, "en_AU"},
    255    {0x2809, "en_BZ"},
    256    {0x1009, "en_CA"},
    257    {0x0809, "en_GB"},
    258    {0x3c09, "en_HK"},
    259    {0x3809, "en_ID"},
    260    {0x1809, "en_IE"},
    261    {0x4009, "en_IN"},
    262    {0x2009, "en_JM"},
    263    {0x4409, "en_MY"},
    264    {0x1409, "en_NZ"},
    265    {0x3409, "en_PH"},
    266    {0x4809, "en_SG"},
    267    {0x2C09, "en_TT"},
    268    {0x0409, "en_US"},
    269    {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
    270    {0x2409, "en_029"},
    271    {0x1c09, "en_ZA"},
    272    {0x3009, "en_ZW"},
    273    {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
    274    {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    275    {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    276    {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    277    {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    278    {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
    279 };
    280 
    281 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
    282    {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
    283 };
    284 
    285 // Windows doesn't know POSIX or BCP47 Unicode traditional sort names
    286 ILCID_POSIX_SUBTABLE(es) {
    287    {0x0a,   "es"},
    288    {0x2c0a, "es_AR"},
    289    {0x400a, "es_BO"},
    290    {0x340a, "es_CL"},
    291    {0x240a, "es_CO"},
    292    {0x140a, "es_CR"},
    293    {0x5c0a, "es_CU"},
    294    {0x1c0a, "es_DO"},
    295    {0x300a, "es_EC"},
    296    {0x0c0a, "es_ES"},      /*Modern sort.*/
    297    {0x100a, "es_GT"},
    298    {0x480a, "es_HN"},
    299    {0x080a, "es_MX"},
    300    {0x4c0a, "es_NI"},
    301    {0x180a, "es_PA"},
    302    {0x280a, "es_PE"},
    303    {0x500a, "es_PR"},
    304    {0x3c0a, "es_PY"},
    305    {0x440a, "es_SV"},
    306    {0x540a, "es_US"},
    307    {0x380a, "es_UY"},
    308    {0x200a, "es_VE"},
    309    {0x580a, "es_419"},
    310    {0x040a, "es_ES@collation=traditional"},
    311    {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
    312 };
    313 
    314 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
    315 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
    316 
    317 /* ISO-639 doesn't distinguish between Persian and Dari.*/
    318 ILCID_POSIX_SUBTABLE(fa) {
    319    {0x29,   "fa"},
    320    {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
    321    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    322 };
    323 
    324 
    325 /* duplicate for roundtripping */
    326 ILCID_POSIX_SUBTABLE(fa_AF) {
    327    {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
    328    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
    329 };
    330 
    331 ILCID_POSIX_SUBTABLE(ff) {
    332    {0x67,   "ff"},
    333    {0x7c67, "ff_Latn"},
    334    {0x0867, "ff_Latn_SN"},
    335    {0x0467, "ff_NG"}
    336 };
    337 
    338 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
    339 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
    340 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
    341 
    342 ILCID_POSIX_SUBTABLE(fr) {
    343    {0x0c,   "fr"},
    344    {0x080c, "fr_BE"},
    345    {0x0c0c, "fr_CA"},
    346    {0x240c, "fr_CD"},
    347    {0x240c, "fr_CG"},
    348    {0x100c, "fr_CH"},
    349    {0x300c, "fr_CI"},
    350    {0x2c0c, "fr_CM"},
    351    {0x040c, "fr_FR"},
    352    {0x3c0c, "fr_HT"},
    353    {0x140c, "fr_LU"},
    354    {0x380c, "fr_MA"},
    355    {0x180c, "fr_MC"},
    356    {0x340c, "fr_ML"},
    357    {0x200c, "fr_RE"},
    358    {0x280c, "fr_SN"},
    359    {0xe40c, "fr_015"},
    360    {0x1c0c, "fr_029"}
    361 };
    362 
    363 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
    364 
    365 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
    366 
    367 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
    368    {0x3c,   "ga"},
    369    {0x083c, "ga_IE"},
    370    {0x043c, "gd_GB"}
    371 };
    372 
    373 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
    374    {0x91,   "gd"},
    375    {0x0491, "gd_GB"}
    376 };
    377 
    378 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
    379 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
    380 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
    381 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
    382 
    383 ILCID_POSIX_SUBTABLE(ha) {
    384    {0x68,   "ha"},
    385    {0x7c68, "ha_Latn"},
    386    {0x0468, "ha_Latn_NG"},
    387 };
    388 
    389 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
    390 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
    391 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
    392 
    393 /* This LCID is really four different locales.*/
    394 ILCID_POSIX_SUBTABLE(hr) {
    395    {0x1a,   "hr"},
    396    {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
    397    {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
    398    {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
    399    {0x781a, "bs"},     /* Bosnian */
    400    {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
    401    {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
    402    {0x101a, "hr_BA"},  /* Croatian in Bosnia */
    403    {0x041a, "hr_HR"},  /* Croatian*/
    404    {0x2c1a, "sr_Latn_ME"},
    405    {0x241a, "sr_Latn_RS"},
    406    {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
    407    {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
    408    {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
    409    {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
    410    {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
    411    {0x301a, "sr_Cyrl_ME"},
    412    {0x281a, "sr_Cyrl_RS"},
    413    {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
    414    {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
    415 };
    416 
    417 ILCID_POSIX_SUBTABLE(hsb) {
    418    {0x2E,   "hsb"},
    419    {0x042E, "hsb_DE"},
    420    {0x082E, "dsb_DE"},
    421    {0x7C2E, "dsb"},
    422 };
    423 
    424 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
    425 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
    426 
    427 ILCID_POSIX_SUBTABLE(ibb) {
    428    {0x69, "ibb"},
    429    {0x0469, "ibb_NG"}
    430 };
    431 
    432 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
    433 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
    434 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
    435 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
    436 
    437 ILCID_POSIX_SUBTABLE(it) {
    438    {0x10,   "it"},
    439    {0x0810, "it_CH"},
    440    {0x0410, "it_IT"}
    441 };
    442 
    443 ILCID_POSIX_SUBTABLE(iu) {
    444    {0x5d,   "iu"},
    445    {0x045d, "iu_Cans_CA"},
    446    {0x785d, "iu_Cans"},
    447    {0x085d, "iu_Latn_CA"},
    448    {0x7c5d, "iu_Latn"}
    449 };
    450 
    451 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
    452 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
    453 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
    454 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
    455 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
    456 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
    457 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
    458 
    459 ILCID_POSIX_SUBTABLE(ko) {
    460    {0x12,   "ko"},
    461    {0x0812, "ko_KP"},
    462    {0x0412, "ko_KR"}
    463 };
    464 
    465 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
    466 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
    467 
    468 ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
    469    {0x60,   "ks"},
    470    {0x0460, "ks_Arab_IN"},
    471    {0x0860, "ks_Deva_IN"}
    472 };
    473 
    474 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
    475 
    476 ILCID_POSIX_SUBTABLE(la) {
    477    {0x76,   "la"},
    478    {0x0476, "la_001"},
    479    {0x0476, "la_IT"}       /*Left in for compatibility*/
    480 };
    481 
    482 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
    483 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
    484 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
    485 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
    486 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
    487 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
    488 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
    489 
    490 ILCID_POSIX_SUBTABLE(mn) {
    491    {0x50,   "mn"},
    492    {0x0450, "mn_MN"},
    493    {0x7c50, "mn_Mong"},
    494    {0x0850, "mn_Mong_CN"},
    495    {0x0850, "mn_CN"},
    496    {0x7850, "mn_Cyrl"},
    497    {0x0c50, "mn_Mong_MN"}
    498 };
    499 
    500 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
    501 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
    502 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
    503 
    504 ILCID_POSIX_SUBTABLE(ms) {
    505    {0x3e,   "ms"},
    506    {0x083e, "ms_BN"},   /* Brunei Darussalam*/
    507    {0x043e, "ms_MY"}    /* Malaysia*/
    508 };
    509 
    510 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
    511 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
    512 
    513 ILCID_POSIX_SUBTABLE(ne) {
    514    {0x61,   "ne"},
    515    {0x0861, "ne_IN"},   /* India*/
    516    {0x0461, "ne_NP"}    /* Nepal*/
    517 };
    518 
    519 ILCID_POSIX_SUBTABLE(nl) {
    520    {0x13,   "nl"},
    521    {0x0813, "nl_BE"},
    522    {0x0413, "nl_NL"}
    523 };
    524 
    525 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
    526 // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
    527 ILCID_POSIX_SUBTABLE(no) {
    528    {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ 
    529    {0x7c14, "nb"},     /* really nb */
    530    {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
    531    {0x0414, "no_NO"},  /* really nb_NO */
    532    {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
    533    {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
    534    {0x0814, "no_NO_NY"}/* really nn_NO */
    535 };
    536 
    537 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
    538 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
    539 
    540 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
    541    {0x72,   "om"},
    542    {0x0472, "om_ET"},
    543    {0x0472, "gaz_ET"}
    544 };
    545 
    546 /* Declared as or_IN to get around compiler errors*/
    547 ILCID_POSIX_SUBTABLE(or_IN) {
    548    {0x48,   "or"},
    549    {0x0448, "or_IN"},
    550 };
    551 
    552 ILCID_POSIX_SUBTABLE(pa) {
    553    {0x46,   "pa"},
    554    {0x0446, "pa_IN"},
    555    {0x0846, "pa_Arab_PK"},
    556    {0x0846, "pa_PK"}
    557 };
    558 
    559 ILCID_POSIX_SUBTABLE(pap) {
    560    {0x79, "pap"},
    561    {0x0479, "pap_029"},
    562    {0x0479, "pap_AN"}     /*Left in for compatibility*/
    563 };
    564 
    565 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
    566 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
    567 
    568 ILCID_POSIX_SUBTABLE(pt) {
    569    {0x16,   "pt"},
    570    {0x0416, "pt_BR"},
    571    {0x0816, "pt_PT"}
    572 };
    573 
    574 ILCID_POSIX_SUBTABLE(qu) {
    575    {0x6b,   "qu"},
    576    {0x046b, "qu_BO"},
    577    {0x086b, "qu_EC"},
    578    {0x0C6b, "qu_PE"},
    579    {0x046b, "quz_BO"},
    580    {0x086b, "quz_EC"},
    581    {0x0C6b, "quz_PE"}
    582 };
    583 
    584 ILCID_POSIX_SUBTABLE(quc) {
    585    {0x93,   "quc"},
    586    {0x0493, "quc_CO"},
    587    /*
    588        "quc_Latn_GT" is an exceptional case. Language ID of "quc"
    589        is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
    590        under the group of "qut". "qut" is a retired ISO 639-3 language
    591        code for West Central Quiche, and merged to "quc".
    592        It looks Windows previously reserved "qut" for K'iche', but,
    593        decided to use "quc" when adding a locale for K'iche' (Guatemala).
    594 
    595        This data structure used here assumes language ID bits in
    596        LCID is unique for alphabetic language code. But this is not true
    597        for "quc_Latn_GT". If we don't have the data below, LCID look up
    598        by alphabetic locale ID (POSIX) will fail. The same entry is found
    599        under "qut" below, which is required for reverse look up.
    600    */
    601    {0x0486, "quc_Latn_GT"}
    602 };
    603 
    604 ILCID_POSIX_SUBTABLE(qut) {
    605    {0x86,   "qut"},
    606    {0x0486, "qut_GT"},
    607    /*
    608        See the note in "quc" above.
    609    */
    610    {0x0486, "quc_Latn_GT"}
    611 };
    612 
    613 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
    614 
    615 ILCID_POSIX_SUBTABLE(ro) {
    616    {0x18,   "ro"},
    617    {0x0418, "ro_RO"},
    618    {0x0818, "ro_MD"}
    619 };
    620 
    621 // TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
    622 // More likely this is a similar concept to the Windows 0x7f Invariant locale ""
    623 // (Except that it's not invariant in ICU)
    624 ILCID_POSIX_SUBTABLE(root) {
    625    {0x00,   "root"}
    626 };
    627 
    628 ILCID_POSIX_SUBTABLE(ru) {
    629    {0x19,   "ru"},
    630    {0x0419, "ru_RU"},
    631    {0x0819, "ru_MD"}
    632 };
    633 
    634 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
    635 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
    636 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
    637 
    638 ILCID_POSIX_SUBTABLE(sd) {
    639    {0x59,   "sd"},
    640    {0x0459, "sd_Deva_IN"},
    641    {0x0459, "sd_IN"},
    642    {0x0859, "sd_Arab_PK"},
    643    {0x0859, "sd_PK"},
    644    {0x7c59, "sd_Arab"}
    645 };
    646 
    647 ILCID_POSIX_SUBTABLE(se) {
    648    {0x3b,   "se"},
    649    {0x0c3b, "se_FI"},
    650    {0x043b, "se_NO"},
    651    {0x083b, "se_SE"},
    652    {0x783b, "sma"},
    653    {0x183b, "sma_NO"},
    654    {0x1c3b, "sma_SE"},
    655    {0x7c3b, "smj"},
    656    {0x703b, "smn"},
    657    {0x743b, "sms"},
    658    {0x103b, "smj_NO"},
    659    {0x143b, "smj_SE"},
    660    {0x243b, "smn_FI"},
    661    {0x203b, "sms_FI"},
    662 };
    663 
    664 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
    665 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
    666 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
    667 
    668 ILCID_POSIX_SUBTABLE(so) {
    669    {0x77,   "so"},
    670    {0x0477, "so_SO"}
    671 };
    672 
    673 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
    674 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
    675 
    676 ILCID_POSIX_SUBTABLE(sv) {
    677    {0x1d,   "sv"},
    678    {0x081d, "sv_FI"},
    679    {0x041d, "sv_SE"}
    680 };
    681 
    682 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
    683 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
    684 
    685 ILCID_POSIX_SUBTABLE(ta) {
    686    {0x49,   "ta"},
    687    {0x0449, "ta_IN"},
    688    {0x0849, "ta_LK"}
    689 };
    690 
    691 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
    692 
    693 /* Cyrillic based by default */
    694 ILCID_POSIX_SUBTABLE(tg) {
    695    {0x28,   "tg"},
    696    {0x7c28, "tg_Cyrl"},
    697    {0x0428, "tg_Cyrl_TJ"}
    698 };
    699 
    700 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
    701 
    702 ILCID_POSIX_SUBTABLE(ti) {
    703    {0x73,   "ti"},
    704    {0x0873, "ti_ER"},
    705    {0x0473, "ti_ET"}
    706 };
    707 
    708 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
    709 
    710 ILCID_POSIX_SUBTABLE(tn) {
    711    {0x32,   "tn"},
    712    {0x0832, "tn_BW"},
    713    {0x0432, "tn_ZA"}
    714 };
    715 
    716 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
    717 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
    718 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
    719 
    720 ILCID_POSIX_SUBTABLE(tzm) {
    721    {0x5f,   "tzm"},
    722    {0x7c5f, "tzm_Latn"},
    723    {0x085f, "tzm_Latn_DZ"},
    724    {0x105f, "tzm_Tfng_MA"},
    725    {0x045f, "tzm_Arab_MA"},
    726    {0x045f, "tmz"}
    727 };
    728 
    729 ILCID_POSIX_SUBTABLE(ug) {
    730    {0x80,   "ug"},
    731    {0x0480, "ug_CN"},
    732    {0x0480, "ug_Arab_CN"}
    733 };
    734 
    735 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
    736 
    737 ILCID_POSIX_SUBTABLE(ur) {
    738    {0x20,   "ur"},
    739    {0x0820, "ur_IN"},
    740    {0x0420, "ur_PK"}
    741 };
    742 
    743 ILCID_POSIX_SUBTABLE(uz) {
    744    {0x43,   "uz"},
    745    {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
    746    {0x7843, "uz_Cyrl"},  /* Cyrillic based */
    747    {0x0843, "uz_UZ"},  /* Cyrillic based */
    748    {0x0443, "uz_Latn_UZ"}, /* Latin based */
    749    {0x7c43, "uz_Latn"} /* Latin based */
    750 };
    751 
    752 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
    753    {0x33,   "ve"},
    754    {0x0433, "ve_ZA"},
    755    {0x0433, "ven_ZA"}
    756 };
    757 
    758 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
    759 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
    760 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
    761 
    762 ILCID_POSIX_SUBTABLE(yi) {
    763    {0x003d, "yi"},
    764    {0x043d, "yi_001"}
    765 };
    766 
    767 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
    768 
    769 // Windows & ICU tend to different names for some of these
    770 // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
    771 ILCID_POSIX_SUBTABLE(zh) {
    772    {0x0004, "zh_Hans"},
    773    {0x7804, "zh"},
    774    {0x0804, "zh_CN"},
    775    {0x0804, "zh_Hans_CN"},
    776    {0x0c04, "zh_Hant_HK"},
    777    {0x0c04, "zh_HK"},
    778    {0x1404, "zh_Hant_MO"},
    779    {0x1404, "zh_MO"},
    780    {0x1004, "zh_Hans_SG"},
    781    {0x1004, "zh_SG"},
    782    {0x0404, "zh_Hant_TW"},
    783    {0x7c04, "zh_Hant"},
    784    {0x0404, "zh_TW"},
    785    {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
    786    {0x30404,"zh_TW"},          /* Bopomofo order */
    787    {0x20004,"zh@collation=stroke"},
    788    {0x20404,"zh_Hant@collation=stroke"},
    789    {0x20404,"zh_Hant_TW@collation=stroke"},
    790    {0x20404,"zh_TW@collation=stroke"},
    791    {0x20804,"zh_Hans@collation=stroke"},
    792    {0x20804,"zh_Hans_CN@collation=stroke"},
    793    {0x20804,"zh_CN@collation=stroke"}
    794    // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
    795 };
    796 
    797 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
    798 
    799 /* This must be static and grouped by LCID. */
    800 constexpr ILcidPosixMap gPosixIDmap[] = {
    801    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
    802    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
    803    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
    804    ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
    805    ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
    806    ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
    807    ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
    808    ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
    809 /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
    810    ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
    811    ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
    812    ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
    813    ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
    814    ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
    815    ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
    816    ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
    817    ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
    818    ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
    819    ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
    820    ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
    821    ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
    822    ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
    823    ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
    824    ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
    825    ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
    826    ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
    827    ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
    828    ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
    829    ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
    830    ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
    831    ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
    832    ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
    833    ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
    834    ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
    835    ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
    836    ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
    837    ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
    838    ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
    839    ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
    840    ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
    841    ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
    842    ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
    843    ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
    844    ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
    845    ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
    846    ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
    847    ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
    848    ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
    849    ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
    850    ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
    851    ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
    852    ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
    853    ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
    854    ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
    855    ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
    856    ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
    857    ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
    858    ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
    859    ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
    860    ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
    861    ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
    862    ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
    863    ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
    864    ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
    865    ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
    866    ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
    867    ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
    868    ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
    869    ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
    870    ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
    871    ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
    872    ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
    873    ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
    874    ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
    875    ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
    876    ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
    877    ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
    878    ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
    879    ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
    880    ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
    881    ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
    882    ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
    883    ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
    884    ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
    885    ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
    886    ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
    887 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
    888    ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
    889    ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
    890 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
    891    ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
    892    ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
    893    ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
    894    ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
    895    ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
    896    ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
    897    ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
    898    ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
    899    ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
    900    ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
    901    ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
    902    ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
    903    ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
    904    ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
    905    ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
    906    ILCID_POSIX_MAP(root),  /*  root                          0x00 */
    907    ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
    908    ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
    909    ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
    910    ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
    911    ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
    912    ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
    913 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
    914    ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
    915    ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
    916    ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
    917    ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
    918    ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
    919 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
    920    ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
    921    ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
    922    ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
    923    ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
    924    ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
    925    ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
    926    ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
    927    ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
    928    ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
    929    ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
    930    ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
    931    ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
    932    ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
    933    ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
    934    ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
    935    ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
    936    ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
    937    ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
    938    ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
    939    ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
    940    ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
    941    ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
    942    ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
    943    ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
    944    ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
    945    ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
    946    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
    947 };
    948 
    949 constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
    950 
    951 /**
    952 * Do not call this function. It is called by hostID.
    953 * The function is not private because this struct must stay as a C struct,
    954 * and this is an internal class.
    955 */
    956 int32_t
    957 idCmp(const char* id1, const char* id2)
    958 {
    959    int32_t diffIdx = 0;
    960    while (*id1 == *id2 && *id1 != 0) {
    961        diffIdx++;
    962        id1++;
    963        id2++;
    964    }
    965    return diffIdx;
    966 }
    967 
    968 /**
    969 * Searches for a Windows LCID
    970 *
    971 * @param posixID the Posix style locale id.
    972 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
    973 *               no equivalent Windows LCID.
    974 * @return the LCID
    975 */
    976 uint32_t
    977 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
    978 {
    979    if (U_FAILURE(status)) { return locmap_root->hostID; }
    980    int32_t bestIdx = 0;
    981    int32_t bestIdxDiff = 0;
    982    int32_t posixIDlen = static_cast<int32_t>(uprv_strlen(posixID));
    983    uint32_t idx;
    984 
    985    for (idx = 0; idx < this_0->numRegions; idx++ ) {
    986        int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
    987        if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
    988            if (posixIDlen == sameChars) {
    989                /* Exact match */
    990                return this_0->regionMaps[idx].hostID;
    991            }
    992            bestIdxDiff = sameChars;
    993            bestIdx = idx;
    994        }
    995    }
    996    /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
    997    /* We also have to make sure that sid and si and similar string subsets don't match. */
    998    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
    999        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
   1000    {
   1001        status = U_USING_FALLBACK_WARNING;
   1002        return this_0->regionMaps[bestIdx].hostID;
   1003    }
   1004 
   1005    /*no match found */
   1006    status = U_ILLEGAL_ARGUMENT_ERROR;
   1007    return locmap_root->hostID;
   1008 }
   1009 
   1010 const char*
   1011 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
   1012 {
   1013    uint32_t i;
   1014    for (i = 0; i < this_0->numRegions; i++)
   1015    {
   1016        if (this_0->regionMaps[i].hostID == hostID)
   1017        {
   1018            return this_0->regionMaps[i].posixID;
   1019        }
   1020    }
   1021 
   1022    /* If you get here, then no matching region was found,
   1023       so return the language id with the wild card region. */
   1024    return this_0->regionMaps[0].posixID;
   1025 }
   1026 
   1027 /*
   1028 //////////////////////////////////////
   1029 //
   1030 // LCID --> POSIX
   1031 //
   1032 /////////////////////////////////////
   1033 */
   1034 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
   1035 /*
   1036 * Various language tags needs to be changed:
   1037 * quz -> qu
   1038 * prs -> fa
   1039 */
   1040 void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) {
   1041    if (len >= 3) {
   1042        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {
   1043            buffer[2] = 0;
   1044            uprv_strcat(buffer, buffer+3);
   1045        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {
   1046            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;
   1047            uprv_strcat(buffer, buffer+3);
   1048        }
   1049    }
   1050 }
   1051 #endif
   1052 
   1053 }  // namespace
   1054 
   1055 U_CAPI int32_t
   1056 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
   1057 {
   1058    uint16_t langID;
   1059    uint32_t localeIndex;
   1060    UBool bLookup = true;
   1061    const char *pPosixID = nullptr;
   1062 
   1063 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
   1064    static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
   1065 
   1066    char locName[LOCALE_NAME_MAX_LENGTH] = {};
   1067 
   1068    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
   1069    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
   1070    // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
   1071    // use the Windows API to resolve locale ID for this specific case.
   1072    if ((hostid & 0x3FF) != 0x92) {
   1073        int32_t tmpLen = 0;
   1074        char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
   1075 
   1076        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
   1077        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
   1078        if (tmpLen > 1) {
   1079            int32_t i = 0;
   1080            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
   1081            bLookup = false;
   1082            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
   1083            {
   1084                locName[i] = (char)(windowsLocaleName[i]);
   1085 
   1086                // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
   1087                // In such cases, we need special mapping data found in the hardcoded table
   1088                // in this source file.
   1089                if (windowsLocaleName[i] == L'_')
   1090                {
   1091                    // Keep the base locale, without variant
   1092                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
   1093                    locName[i] = '\0';
   1094                    tmpLen = i;
   1095                    bLookup = true;
   1096                    break;
   1097                }
   1098                else if (windowsLocaleName[i] == L'-')
   1099                {
   1100                    // Windows names use -, ICU uses _
   1101                    locName[i] = '_';
   1102                }
   1103                else if (windowsLocaleName[i] == L'\0')
   1104                {
   1105                    // No point in doing more work than necessary
   1106                    break;
   1107                }
   1108            }
   1109            // TODO: Need to understand this better, why isn't it an alias?
   1110            FIX_LANGUAGE_ID_TAG(locName, tmpLen);
   1111            pPosixID = locName;
   1112        }
   1113    }
   1114 #endif
   1115 
   1116    if (bLookup) {
   1117        const char *pCandidate = nullptr;
   1118        langID = LANGUAGE_LCID(hostid);
   1119 
   1120        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
   1121            if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
   1122                pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
   1123                break;
   1124            }
   1125        }
   1126 
   1127        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
   1128           If a match in the hardcoded table is longer than the Windows locale name without
   1129           variant, we use the one as the result */
   1130        if (pCandidate && (pPosixID == nullptr || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
   1131            pPosixID = pCandidate;
   1132        }
   1133    }
   1134 
   1135    if (pPosixID) {
   1136        int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
   1137        int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
   1138        uprv_memcpy(posixID, pPosixID, copyLen);
   1139        if (resLen < posixIDCapacity) {
   1140            posixID[resLen] = 0;
   1141            if (*status == U_STRING_NOT_TERMINATED_WARNING) {
   1142                *status = U_ZERO_ERROR;
   1143            }
   1144        } else if (resLen == posixIDCapacity) {
   1145            *status = U_STRING_NOT_TERMINATED_WARNING;
   1146        } else {
   1147            *status = U_BUFFER_OVERFLOW_ERROR;
   1148        }
   1149        return resLen;
   1150    }
   1151 
   1152    /* no match found */
   1153    *status = U_ILLEGAL_ARGUMENT_ERROR;
   1154    return 0;
   1155 }
   1156 
   1157 /*
   1158 //////////////////////////////////////
   1159 //
   1160 // POSIX --> LCID
   1161 // This should only be called from uloc_getLCID.
   1162 // The locale ID must be in canonical form.
   1163 //
   1164 /////////////////////////////////////
   1165 */
   1166 U_CAPI uint32_t
   1167 uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
   1168 {
   1169    if (U_FAILURE(*status)) {
   1170        return 0;
   1171    }
   1172 
   1173    // The purpose of this function is to leverage the Windows platform name->lcid
   1174    // conversion functionality when available.
   1175 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
   1176    int32_t len;
   1177    icu::CharString baseName;
   1178    const char * mylocaleID = localeID;
   1179 
   1180    // Check any for keywords.
   1181    if (uprv_strchr(localeID, '@'))
   1182    {
   1183        icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
   1184        if (U_SUCCESS(*status) && !collVal.isEmpty())
   1185        {
   1186            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
   1187            return 0;
   1188        }
   1189        else
   1190        {
   1191            // If the locale ID contains keywords other than collation, just use the base name.
   1192            baseName = ulocimp_getBaseName(localeID, *status);
   1193            if (U_SUCCESS(*status) && !baseName.isEmpty())
   1194            {
   1195                mylocaleID = baseName.data();
   1196            }
   1197        }
   1198    }
   1199 
   1200    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
   1201    icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);
   1202 
   1203    if (U_SUCCESS(*status))
   1204    {
   1205        // Need it to be UTF-16, not 8-bit
   1206        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
   1207        int32_t i;
   1208        for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
   1209        {
   1210            if (asciiBCP47Tag[i] == '\0')
   1211            {
   1212                break;
   1213            }
   1214            else
   1215            {
   1216                // Copy the character
   1217                bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
   1218            }
   1219        }
   1220 
   1221        if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
   1222        {
   1223            // Ensure it's null terminated
   1224            bcp47Tag[i] = L'\0';
   1225            LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
   1226            if (lcid > 0)
   1227            {
   1228                // Found LCID from windows, return that one, unless its completely ambiguous
   1229                // LOCALE_USER_DEFAULT and transients are OK because they will round trip
   1230                // for this process.
   1231                if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
   1232                {
   1233                    return lcid;
   1234                }
   1235            }
   1236        }
   1237    }
   1238 #else
   1239    (void) localeID; // Suppress unused variable warning.
   1240 #endif
   1241 
   1242    // Nothing found, or not implemented.
   1243    return 0;
   1244 }
   1245 
   1246 U_CAPI uint32_t
   1247 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
   1248 {
   1249    if (U_FAILURE(*status) ||
   1250            langID == nullptr ||
   1251            posixID == nullptr ||
   1252            uprv_strlen(langID) < 2 ||
   1253            uprv_strlen(posixID) < 2) {
   1254        return locmap_root->hostID;
   1255    }
   1256 
   1257    // This function does the table lookup when native platform name->lcid conversion isn't available,
   1258    // or for locales that don't follow patterns the platform expects.
   1259    uint32_t   low    = 0;
   1260    uint32_t   high   = gLocaleCount;
   1261    uint32_t   mid;
   1262    uint32_t   oldmid = 0;
   1263    int32_t    compVal;
   1264 
   1265    uint32_t   value         = 0;
   1266    uint32_t   fallbackValue = (uint32_t)-1;
   1267    UErrorCode myStatus;
   1268    uint32_t   idx;
   1269 
   1270    /*Binary search for the map entry for normal cases */
   1271 
   1272    while (high > low)  /*binary search*/{
   1273 
   1274        mid = (high+low) >> 1; /*Finds median*/
   1275 
   1276        if (mid == oldmid) 
   1277            break;
   1278 
   1279        compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
   1280        if (compVal < 0){
   1281            high = mid;
   1282        }
   1283        else if (compVal > 0){
   1284            low = mid;
   1285        }
   1286        else /*we found it*/{
   1287            return getHostID(&gPosixIDmap[mid], posixID, *status);
   1288        }
   1289        oldmid = mid;
   1290    }
   1291 
   1292    /*
   1293     * Sometimes we can't do a binary search on posixID because some LCIDs
   1294     * go to different locales.  We hit one of those special cases.
   1295     */
   1296    for (idx = 0; idx < gLocaleCount; idx++ ) {
   1297        myStatus = U_ZERO_ERROR;
   1298        value = getHostID(&gPosixIDmap[idx], posixID, myStatus);
   1299        if (myStatus == U_ZERO_ERROR) {
   1300            return value;
   1301        }
   1302        else if (myStatus == U_USING_FALLBACK_WARNING) {
   1303            fallbackValue = value;
   1304        }
   1305    }
   1306 
   1307    if (fallbackValue != (uint32_t)-1) {
   1308        *status = U_USING_FALLBACK_WARNING;
   1309        return fallbackValue;
   1310    }
   1311 
   1312    /* no match found */
   1313    *status = U_ILLEGAL_ARGUMENT_ERROR;
   1314    return locmap_root->hostID;   /* return international (root) */
   1315 }