tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

region.cpp (31743B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014-2016, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 *
      9 *
     10 * File REGION.CPP
     11 *
     12 * Modification History:*
     13 *   Date        Name        Description
     14 * 01/15/13      Emmons      Original Port from ICU4J
     15 ********************************************************************************
     16 */
     17 
     18 /**
     19 * \file
     20 * \brief C++ API: Region classes (territory containment)
     21 */
     22 
     23 #include "unicode/region.h"
     24 #include "unicode/utypes.h"
     25 #include "unicode/uobject.h"
     26 #include "unicode/unistr.h"
     27 #include "unicode/ures.h"
     28 #include "ucln_in.h"
     29 #include "cstring.h"
     30 #include "mutex.h"
     31 #include "uhash.h"
     32 #include "umutex.h"
     33 #include "uresimp.h"
     34 #include "region_impl.h"
     35 #include "util.h"
     36 
     37 #if !UCONFIG_NO_FORMATTING
     38 
     39 
     40 U_CDECL_BEGIN
     41 
     42 /**
     43 * Cleanup callback func
     44 */
     45 static UBool U_CALLCONV region_cleanup()
     46 {
     47    icu::Region::cleanupRegionData();
     48 
     49    return true;
     50 }
     51 
     52 U_CDECL_END
     53 
     54 U_NAMESPACE_BEGIN
     55 
     56 static UInitOnce gRegionDataInitOnce {};
     57 static UVector* availableRegions[URGN_LIMIT];
     58 
     59 static UHashtable *regionAliases = nullptr;
     60 static UHashtable *regionIDMap = nullptr;
     61 static UHashtable *numericCodeMap = nullptr;
     62 static UVector *allRegions = nullptr;
     63 
     64 static const char16_t UNKNOWN_REGION_ID [] = { 0x5A, 0x5A, 0 };  /* "ZZ" */
     65 static const char16_t OUTLYING_OCEANIA_REGION_ID [] = { 0x51, 0x4F, 0 };  /* "QO" */
     66 static const char16_t WORLD_ID [] = { 0x30, 0x30, 0x31, 0 };  /* "001" */
     67 static const char16_t RANGE_MARKER = 0x7E; /* '~' */
     68 
     69 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegionNameEnumeration)
     70 
     71 /*
     72 * Initializes the region data from the ICU resource bundles.  The region data
     73 * contains the basic relationships such as which regions are known, what the numeric
     74 * codes are, any known aliases, and the territory containment data.
     75 *
     76 * If the region data has already loaded, then this method simply returns without doing
     77 * anything meaningful.
     78 */
     79 void U_CALLCONV Region::loadRegionData(UErrorCode &status) {
     80 
     81    // Construct service objs first
     82    LocalUHashtablePointer newRegionIDMap(uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, &status));
     83    LocalUHashtablePointer newNumericCodeMap(uhash_open(uhash_hashLong,uhash_compareLong,nullptr,&status));
     84    LocalUHashtablePointer newRegionAliases(uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,nullptr,&status));
     85 
     86    LocalPointer<UVector> continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
     87    LocalPointer<UVector> groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
     88    LocalPointer<UVector> lpAllRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
     89    allRegions = lpAllRegions.orphan();
     90 
     91    LocalUResourceBundlePointer metadata(ures_openDirect(nullptr,"metadata",&status));
     92    LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",nullptr,&status));
     93    LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(),"territory",nullptr,&status));
     94 
     95    LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
     96    LocalUResourceBundlePointer codeMappings(ures_getByKey(supplementalData.getAlias(),"codeMappings",nullptr,&status));
     97 
     98    LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
     99    LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
    100    LocalUResourceBundlePointer regionRegular(ures_getByKey(regionList.getAlias(),"regular",nullptr,&status));
    101    LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
    102    LocalUResourceBundlePointer regionUnknown(ures_getByKey(regionList.getAlias(),"unknown",nullptr,&status));
    103 
    104    LocalUResourceBundlePointer territoryContainment(ures_getByKey(supplementalData.getAlias(),"territoryContainment",nullptr,&status));
    105    LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",nullptr,&status));
    106    LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",nullptr,&status));
    107 
    108    ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup);
    109    if (U_FAILURE(status)) {
    110        return;
    111    }
    112 
    113    // now, initialize
    114    uhash_setValueDeleter(newRegionIDMap.getAlias(), uprv_deleteUObject);  // regionIDMap owns objs
    115    uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject);  // regionAliases owns the string keys
    116 
    117 
    118    while (U_SUCCESS(status) && ures_hasNext(regionRegular.getAlias())) {
    119        UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),nullptr,&status);
    120        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
    121        char16_t buf[6];
    122        regionName.extract(buf,6,status);
    123        if ( rangeMarkerLocation > 0 ) {
    124            char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
    125            buf[rangeMarkerLocation] = 0;
    126            while (U_SUCCESS(status) && buf[rangeMarkerLocation-1] <= endRange) {
    127                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
    128                allRegions->adoptElement(newRegion.orphan(), status);
    129                buf[rangeMarkerLocation-1]++;
    130            }
    131        } else {
    132            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
    133            allRegions->adoptElement(newRegion.orphan(), status);
    134        }
    135    }
    136 
    137    while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
    138        UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
    139        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
    140        char16_t buf[6];
    141        regionName.extract(buf,6,status);
    142        if ( rangeMarkerLocation > 0 ) {
    143            char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
    144            buf[rangeMarkerLocation] = 0;
    145            while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
    146                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
    147                allRegions->adoptElement(newRegion.orphan(),status);
    148                buf[rangeMarkerLocation-1]++;
    149            }
    150        } else {
    151            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
    152            allRegions->adoptElement(newRegion.orphan(),status);
    153        }
    154    }
    155 
    156    while (U_SUCCESS(status) && ures_hasNext(regionUnknown.getAlias())) {
    157        LocalPointer<UnicodeString> regionName (
    158            new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(), nullptr, &status), status));
    159        allRegions->adoptElement(regionName.orphan(),status);
    160    }
    161 
    162    while (U_SUCCESS(status) && ures_hasNext(worldContainment.getAlias())) {
    163        UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),nullptr,&status));
    164        continents->adoptElement(continentName,status);
    165    }
    166    if (U_FAILURE(status)) {
    167        return;
    168    }
    169 
    170    for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) {
    171        LocalPointer<Region> r(new Region(), status);
    172        if ( U_FAILURE(status) ) {
    173           return;
    174        }
    175        UnicodeString* regionName = static_cast<UnicodeString*>(allRegions->elementAt(i));
    176        r->idStr = *regionName;
    177 
    178        r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV);
    179        r->fType = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
    180 
    181        int32_t pos = 0;
    182        int32_t result = ICU_Utility::parseAsciiInteger(r->idStr, pos);
    183        if (pos > 0) {
    184            r->code = result; // Convert string to number
    185            uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status);
    186            r->fType = URGN_SUBCONTINENT;
    187        } else {
    188            r->code = -1;
    189        }
    190        void* idStrAlias = (void*)&(r->idStr); // about to orphan 'r'. Save this off.
    191        uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership
    192    }
    193 
    194    UResourceBundle *groupingBundle = nullptr;
    195    while (U_SUCCESS(status) && ures_hasNext(groupingContainment.getAlias())) {
    196        groupingBundle = ures_getNextResource(groupingContainment.getAlias(), groupingBundle, &status);
    197        if (U_FAILURE(status)) {
    198            break;
    199        }
    200        UnicodeString *groupingName = new UnicodeString(ures_getKey(groupingBundle), -1, US_INV);
    201        LocalPointer<UnicodeString> lpGroupingName(groupingName, status);
    202        groupings->adoptElement(lpGroupingName.orphan(), status);
    203        if (U_FAILURE(status)) {
    204            break;
    205        }
    206        Region* grouping = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), groupingName));
    207        if (grouping != nullptr) {
    208            for (int32_t i = 0; i < ures_getSize(groupingBundle) && U_SUCCESS(status); i++) {
    209                UnicodeString child = ures_getUnicodeStringByIndex(groupingBundle, i, &status);
    210                if (U_SUCCESS(status)) {
    211                    if (grouping->containedRegions == nullptr) {
    212                        LocalPointer<UVector> lpContainedRegions(
    213                            new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    214                        grouping->containedRegions = lpContainedRegions.orphan();
    215                        if (U_FAILURE(status)) {
    216                            break;
    217                        }
    218                    }
    219                    LocalPointer<UnicodeString> lpChildCopy(new UnicodeString(child), status);
    220                    grouping->containedRegions->adoptElement(lpChildCopy.orphan(), status);
    221                }
    222            }
    223        }
    224    }
    225    ures_close(groupingBundle);
    226    
    227    // Process the territory aliases
    228    while (U_SUCCESS(status) && ures_hasNext(territoryAlias.getAlias())) {
    229        LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),nullptr,&status));
    230        const char *aliasFrom = ures_getKey(res.getAlias());
    231        LocalPointer<UnicodeString> aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status);
    232        UnicodeString aliasTo = ures_getUnicodeStringByKey(res.getAlias(),"replacement",&status);
    233        res.adoptInstead(nullptr);
    234 
    235        const Region* aliasToRegion = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &aliasTo));
    236        Region* aliasFromRegion = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), aliasFromStr.getAlias()));
    237 
    238        if ( aliasToRegion != nullptr && aliasFromRegion == nullptr ) { // This is just an alias from some string to a region
    239            uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status);
    240        } else {
    241            if ( aliasFromRegion == nullptr ) { // Deprecated region code not in the primary codes list - so need to create a deprecated region for it.
    242                LocalPointer<Region> newRgn(new Region, status); 
    243                if ( U_SUCCESS(status) ) {
    244                    aliasFromRegion = newRgn.orphan();
    245                } else {
    246                    return; // error out
    247                }
    248                aliasFromRegion->idStr.setTo(*aliasFromStr);
    249                aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV);
    250                uhash_put(newRegionIDMap.getAlias(),(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status);
    251                int32_t pos = 0;
    252                int32_t result = ICU_Utility::parseAsciiInteger(aliasFromRegion->idStr, pos);
    253                if ( pos > 0 ) {
    254                    aliasFromRegion->code = result; // Convert string to number
    255                    uhash_iput(newNumericCodeMap.getAlias(),aliasFromRegion->code,(void *)aliasFromRegion,&status);
    256                } else {
    257                    aliasFromRegion->code = -1;
    258                }
    259                aliasFromRegion->fType = URGN_DEPRECATED;
    260            } else {
    261                aliasFromRegion->fType = URGN_DEPRECATED;
    262            }
    263 
    264            {
    265                LocalPointer<UVector> newPreferredValues(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    266                aliasFromRegion->preferredValues = newPreferredValues.orphan();
    267            }
    268            if( U_FAILURE(status)) {
    269                return;
    270            }
    271            UnicodeString currentRegion;
    272            //currentRegion.remove();   TODO: was already 0 length?
    273            for (int32_t i = 0 ; i < aliasTo.length() && U_SUCCESS(status); i++ ) {
    274                if ( aliasTo.charAt(i) != 0x0020 ) {
    275                    currentRegion.append(aliasTo.charAt(i));
    276                }
    277                if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) {
    278                    Region* target = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &currentRegion));
    279                    if (target) {
    280                        LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status);
    281                        aliasFromRegion->preferredValues->adoptElement(preferredValue.orphan(),status);  // may add null if err
    282                    }
    283                    currentRegion.remove();
    284                }
    285            }
    286        }
    287    }
    288 
    289    // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
    290    while (U_SUCCESS(status) && ures_hasNext(codeMappings.getAlias())) {
    291        UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),nullptr,&status);
    292        if (U_SUCCESS(status) && ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) {
    293            UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status);
    294            UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status);
    295            UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status);
    296 
    297            Region* r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &codeMappingID));
    298            if ( r ) {
    299                int32_t pos = 0;
    300                int32_t result = ICU_Utility::parseAsciiInteger(codeMappingNumber, pos);
    301                if ( pos > 0 ) {
    302                    r->code = result; // Convert string to number
    303                    uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)r,&status);
    304                }
    305                LocalPointer<UnicodeString> code3(new UnicodeString(codeMapping3Letter), status);
    306                uhash_put(newRegionAliases.getAlias(),(void *)code3.orphan(), (void *)r,&status);
    307            }
    308        }
    309        ures_close(mapping);
    310    }
    311 
    312    // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
    313    Region *r;
    314    UnicodeString WORLD_ID_STRING(WORLD_ID);
    315    r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &WORLD_ID_STRING));
    316    if ( r ) {
    317        r->fType = URGN_WORLD;
    318    }
    319 
    320    UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID);
    321    r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &UNKNOWN_REGION_ID_STRING));
    322    if ( r ) {
    323        r->fType = URGN_UNKNOWN;
    324    }
    325 
    326    for ( int32_t i = 0 ; i < continents->size() ; i++ ) {
    327        r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), continents->elementAt(i)));
    328        if ( r ) {
    329            r->fType = URGN_CONTINENT;
    330        }
    331    }
    332 
    333    for ( int32_t i = 0 ; i < groupings->size() ; i++ ) {
    334        r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), groupings->elementAt(i)));
    335        if ( r ) {
    336            r->fType = URGN_GROUPING;
    337        }
    338    }
    339 
    340    // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
    341    // even though it looks like a territory code.  Need to handle it here.
    342 
    343    UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID);
    344    r = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &OUTLYING_OCEANIA_REGION_ID_STRING));
    345    if ( r ) {
    346        r->fType = URGN_SUBCONTINENT;
    347    }
    348 
    349    // Load territory containment info from the supplemental data.
    350    while ( ures_hasNext(territoryContainment.getAlias()) ) {
    351        LocalUResourceBundlePointer mapping(ures_getNextResource(territoryContainment.getAlias(),nullptr,&status));
    352        if( U_FAILURE(status) ) {
    353            return;  // error out
    354        }
    355        const char *parent = ures_getKey(mapping.getAlias());
    356        if (uprv_strcmp(parent, "containedGroupings") == 0 || uprv_strcmp(parent, "deprecated") == 0) {
    357            continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip.
    358            // #11232 is to do something useful with these.
    359        }
    360        UnicodeString parentStr = UnicodeString(parent, -1 , US_INV);
    361        Region* parentRegion = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &parentStr));
    362 
    363        for ( int j = 0 ; j < ures_getSize(mapping.getAlias()); j++ ) {
    364            UnicodeString child = ures_getUnicodeStringByIndex(mapping.getAlias(),j,&status);
    365            Region* childRegion = static_cast<Region*>(uhash_get(newRegionIDMap.getAlias(), &child));
    366            if ( parentRegion != nullptr && childRegion != nullptr ) {
    367 
    368                // Add the child region to the set of regions contained by the parent
    369                if (parentRegion->containedRegions == nullptr) {
    370                    LocalPointer<UVector> lpContainedRegions(
    371                        new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    372                    parentRegion->containedRegions = lpContainedRegions.orphan();
    373                    if (U_FAILURE(status)) {
    374                        return;
    375                    }
    376                }
    377 
    378                LocalPointer<UnicodeString> childStr(new UnicodeString(), status);
    379                if (U_FAILURE(status)) {
    380                    return;  // error out
    381                }
    382                childStr->fastCopyFrom(childRegion->idStr);
    383                parentRegion->containedRegions->adoptElement(childStr.orphan(),status);
    384                if (U_FAILURE(status)) {
    385                    return;
    386                }
    387 
    388                // Set the parent region to be the containing region of the child.
    389                // Regions of type GROUPING can't be set as the parent, since another region
    390                // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
    391                if ( parentRegion->fType != URGN_GROUPING) {
    392                    childRegion->containingRegion = parentRegion;
    393                }
    394            }
    395        }
    396    }
    397 
    398    // Create the availableRegions lists
    399    int32_t pos = UHASH_FIRST;
    400    while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) {
    401        Region* ar = static_cast<Region*>(element->value.pointer);
    402        if ( availableRegions[ar->fType] == nullptr ) {
    403            LocalPointer<UVector> newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
    404            availableRegions[ar->fType] = newAr.orphan();
    405        }
    406        LocalPointer<UnicodeString> arString(new UnicodeString(ar->idStr), status);
    407        if( U_FAILURE(status) ) {
    408            return;  // error out
    409        }
    410        availableRegions[ar->fType]->adoptElement(arString.orphan(), status);
    411    }
    412    
    413    // copy hashtables
    414    numericCodeMap = newNumericCodeMap.orphan();
    415    regionIDMap = newRegionIDMap.orphan();
    416    regionAliases = newRegionAliases.orphan();
    417 }
    418 
    419 void Region::cleanupRegionData() {
    420    for (int32_t i = 0 ; i < URGN_LIMIT ; i++ ) {
    421        if ( availableRegions[i] ) {
    422            delete availableRegions[i];
    423            availableRegions[i] = nullptr;
    424        }
    425    }
    426 
    427    if (regionAliases) {
    428        uhash_close(regionAliases);
    429    }
    430 
    431    if (numericCodeMap) {
    432        uhash_close(numericCodeMap);
    433    }
    434 
    435    if (regionIDMap) {
    436        uhash_close(regionIDMap);
    437    }
    438    if (allRegions) {
    439        delete allRegions;
    440        allRegions = nullptr;
    441    }
    442 
    443    regionAliases = numericCodeMap = regionIDMap = nullptr;
    444 
    445    gRegionDataInitOnce.reset();
    446 }
    447 
    448 Region::Region ()
    449        : code(-1),
    450          fType(URGN_UNKNOWN),
    451          containingRegion(nullptr),
    452          containedRegions(nullptr),
    453          preferredValues(nullptr) {
    454    id[0] = 0;
    455 }
    456 
    457 Region::~Region () {
    458    delete containedRegions;
    459    delete preferredValues;
    460 }
    461 
    462 /**
    463 * Returns true if the two regions are equal.
    464 * Per PMC, just use pointer compare, since we have at most one instance of each Region.
    465 */
    466 bool
    467 Region::operator==(const Region &that) const {
    468    return (idStr == that.idStr);
    469 }
    470 
    471 /**
    472 * Returns true if the two regions are NOT equal; that is, if operator ==() returns false.
    473 * Per PMC, just use pointer compare, since we have at most one instance of each Region.
    474 */
    475 bool
    476 Region::operator!=(const Region &that) const {
    477        return (idStr != that.idStr);
    478 }
    479 
    480 /**
    481 * Returns a pointer to a Region using the given region code.  The region code can be either 2-letter ISO code,
    482 * 3-letter ISO code,  UNM.49 numeric code, or other valid Unicode Region Code as defined by the LDML specification.
    483 * The identifier will be canonicalized internally using the supplemental metadata as defined in the CLDR.
    484 * If the region code is nullptr or not recognized, the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR )
    485 */
    486 const Region* U_EXPORT2
    487 Region::getInstance(const char *region_code, UErrorCode &status) {
    488 
    489    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status);
    490    if (U_FAILURE(status)) {
    491        return nullptr;
    492    }
    493 
    494    if ( !region_code ) {
    495        status = U_ILLEGAL_ARGUMENT_ERROR;
    496        return nullptr;
    497    }
    498 
    499    UnicodeString regionCodeString = UnicodeString(region_code, -1, US_INV);
    500    Region* r = static_cast<Region*>(uhash_get(regionIDMap, &regionCodeString));
    501 
    502    if ( !r ) {
    503        r = static_cast<Region*>(uhash_get(regionAliases, &regionCodeString));
    504    }
    505 
    506    if ( !r ) { // Unknown region code
    507        status = U_ILLEGAL_ARGUMENT_ERROR;
    508        return nullptr;
    509    }
    510 
    511    if ( r->fType == URGN_DEPRECATED && r->preferredValues->size() == 1) {
    512        StringEnumeration *pv = r->getPreferredValues(status);
    513        pv->reset(status);
    514        const UnicodeString *ustr = pv->snext(status);
    515        r = static_cast<Region*>(uhash_get(regionIDMap, ustr));
    516        delete pv;
    517    }
    518 
    519    return r;
    520 
    521 }
    522 
    523 /**
    524 * Returns a pointer to a Region using the given numeric region code. If the numeric region code is not recognized,
    525 * the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ).
    526 */
    527 const Region* U_EXPORT2
    528 Region::getInstance (int32_t code, UErrorCode &status) {
    529 
    530    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status);
    531    if (U_FAILURE(status)) {
    532        return nullptr;
    533    }
    534 
    535    Region* r = static_cast<Region*>(uhash_iget(numericCodeMap, code));
    536 
    537    if ( !r ) { // Just in case there's an alias that's numeric, try to find it.
    538        UnicodeString id;
    539        ICU_Utility::appendNumber(id, code, 10, 1);
    540        r = static_cast<Region*>(uhash_get(regionAliases, &id));
    541    }
    542 
    543    if( U_FAILURE(status) ) {
    544        return nullptr;
    545    }
    546 
    547    if ( !r ) {
    548        status = U_ILLEGAL_ARGUMENT_ERROR;
    549        return nullptr;
    550    }
    551 
    552    if ( r->fType == URGN_DEPRECATED && r->preferredValues->size() == 1) {
    553        StringEnumeration *pv = r->getPreferredValues(status);
    554        pv->reset(status);
    555        const UnicodeString *ustr = pv->snext(status);
    556        r = static_cast<Region*>(uhash_get(regionIDMap, ustr));
    557        delete pv;
    558    }
    559 
    560    return r;
    561 }
    562 
    563 
    564 /**
    565 * Returns an enumeration over the IDs of all known regions that match the given type.
    566 */
    567 StringEnumeration* U_EXPORT2
    568 Region::getAvailable(URegionType type, UErrorCode &status) {
    569    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status)
    570    if (U_FAILURE(status)) {
    571        return nullptr;
    572    }
    573    return new RegionNameEnumeration(availableRegions[type],status);
    574 }
    575 
    576 /**
    577 * Returns a pointer to the region that contains this region.  Returns nullptr if this region is code "001" (World)
    578 * or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) returns the
    579 * region "039" (Southern Europe).
    580 */
    581 const Region*
    582 Region::getContainingRegion() const {
    583    UErrorCode status = U_ZERO_ERROR;
    584    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status);
    585    return containingRegion;
    586 }
    587 
    588 /**
    589 * Return a pointer to the region that geographically contains this region and matches the given type,
    590 * moving multiple steps up the containment chain if necessary.  Returns nullptr if no containing region can be found
    591 * that matches the given type. Note: The URegionTypes = "URGN_GROUPING", "URGN_DEPRECATED", or "URGN_UNKNOWN"
    592 * are not appropriate for use in this API. nullptr will be returned in this case. For example, calling this method
    593 * with region "IT" (Italy) for type "URGN_CONTINENT" returns the region "150" ( Europe ).
    594 */
    595 const Region*
    596 Region::getContainingRegion(URegionType type) const {
    597    UErrorCode status = U_ZERO_ERROR;
    598    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status);
    599    if ( containingRegion == nullptr ) {
    600        return nullptr;
    601    }
    602 
    603    return ( containingRegion->fType == type)? containingRegion: containingRegion->getContainingRegion(type);
    604 }
    605 
    606 /**
    607 * Return an enumeration over the IDs of all the regions that are immediate children of this region in the
    608 * region hierarchy. These returned regions could be either macro regions, territories, or a mixture of the two,
    609 * depending on the containment data as defined in CLDR.  This API may return nullptr if this region doesn't have
    610 * any sub-regions. For example, calling this method with region "150" (Europe) returns an enumeration containing
    611 * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) - "154" (Northern Europe)
    612 * and "155" (Western Europe).
    613 */
    614 StringEnumeration*
    615 Region::getContainedRegions(UErrorCode &status) const {
    616    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status)
    617    if (U_FAILURE(status)) {
    618        return nullptr;
    619    }
    620    return new RegionNameEnumeration(containedRegions,status);
    621 }
    622 
    623 /**
    624 * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region
    625 * hierarchy and match the given type.  This API may return an empty enumeration if this region doesn't have any
    626 * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type
    627 * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
    628 */
    629 StringEnumeration*
    630 Region::getContainedRegions( URegionType type, UErrorCode &status ) const {
    631    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status)
    632 
    633    UVector result(nullptr, uhash_compareChars, status);
    634    LocalPointer<StringEnumeration> cr(getContainedRegions(status), status);
    635    if (U_FAILURE(status)) {
    636        return nullptr;
    637    }
    638 
    639    const char *regionId;
    640    while((regionId = cr->next(nullptr, status)) != nullptr && U_SUCCESS(status)) {
    641        const Region *r = Region::getInstance(regionId, status);
    642        if ( r->getType() == type) {
    643            result.addElement(const_cast<UnicodeString *>(&r->idStr), status);
    644        } else {
    645            LocalPointer<StringEnumeration> children(r->getContainedRegions(type, status));
    646            const char *id2;
    647            while(U_SUCCESS(status) && ((id2 = children->next(nullptr, status)) != nullptr)) {
    648                const Region *r2 = Region::getInstance(id2,status);
    649                result.addElement(const_cast<UnicodeString *>(&r2->idStr), status);
    650            }
    651        }
    652    }
    653    LocalPointer<StringEnumeration> resultEnumeration(
    654        new RegionNameEnumeration(&result, status), status);
    655    return U_SUCCESS(status) ? resultEnumeration.orphan() : nullptr;
    656 }
    657 
    658 /**
    659 * Returns true if this region contains the supplied other region anywhere in the region hierarchy.
    660 */
    661 UBool
    662 Region::contains(const Region &other) const {
    663    UErrorCode status = U_ZERO_ERROR;
    664    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status);
    665 
    666    if (!containedRegions) {
    667          return false;
    668    }
    669    if (containedRegions->contains((void *)&other.idStr)) {
    670        return true;
    671    } else {
    672        for ( int32_t i = 0 ; i < containedRegions->size() ; i++ ) {
    673            UnicodeString* crStr = static_cast<UnicodeString*>(containedRegions->elementAt(i));
    674            Region* cr = static_cast<Region*>(uhash_get(regionIDMap, crStr));
    675            if ( cr && cr->contains(other) ) {
    676                return true;
    677            }
    678        }
    679    }
    680 
    681    return false;
    682 }
    683 
    684 /**
    685 * For deprecated regions, return an enumeration over the IDs of the regions that are the preferred replacement
    686 * regions for this region.  Returns nullptr for a non-deprecated region.  For example, calling this method with region
    687 * "SU" (Soviet Union) would return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
    688 */
    689 StringEnumeration*
    690 Region::getPreferredValues(UErrorCode &status) const {
    691    umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status)
    692    if (U_FAILURE(status) || fType != URGN_DEPRECATED) {
    693        return nullptr;
    694    }
    695    return new RegionNameEnumeration(preferredValues,status);
    696 }
    697 
    698 
    699 /**
    700 * Return this region's canonical region code.
    701 */
    702 const char*
    703 Region::getRegionCode() const {
    704    return id;
    705 }
    706 
    707 int32_t
    708 Region::getNumericCode() const {
    709    return code;
    710 }
    711 
    712 /**
    713 * Returns the region type of this region.
    714 */
    715 URegionType
    716 Region::getType() const {
    717    return fType;
    718 }
    719 
    720 RegionNameEnumeration::RegionNameEnumeration(UVector *nameList, UErrorCode& status) :
    721        pos(0), fRegionNames(nullptr) {
    722    // TODO: https://unicode-org.atlassian.net/browse/ICU-21829
    723    // Is all of the copying going on here really necessary?
    724    if (nameList && U_SUCCESS(status)) {
    725        LocalPointer<UVector> regionNames(
    726            new UVector(uprv_deleteUObject, uhash_compareUnicodeString, nameList->size(), status), status);
    727        for ( int32_t i = 0 ; U_SUCCESS(status) && i < nameList->size() ; i++ ) {
    728            UnicodeString* this_region_name = static_cast<UnicodeString*>(nameList->elementAt(i));
    729            LocalPointer<UnicodeString> new_region_name(new UnicodeString(*this_region_name), status);
    730            regionNames->adoptElement(new_region_name.orphan(), status);
    731        }
    732        if (U_SUCCESS(status)) {
    733            fRegionNames = regionNames.orphan();
    734        }
    735    }
    736 }
    737 
    738 const UnicodeString*
    739 RegionNameEnumeration::snext(UErrorCode& status) {
    740  if (U_FAILURE(status) || (fRegionNames==nullptr)) {
    741    return nullptr;
    742  }
    743  const UnicodeString* nextStr = static_cast<const UnicodeString*>(fRegionNames->elementAt(pos));
    744  if (nextStr!=nullptr) {
    745    pos++;
    746  }
    747  return nextStr;
    748 }
    749 
    750 void
    751 RegionNameEnumeration::reset(UErrorCode& /*status*/) {
    752    pos=0;
    753 }
    754 
    755 int32_t
    756 RegionNameEnumeration::count(UErrorCode& /*status*/) const {
    757    return (fRegionNames==nullptr) ? 0 : fRegionNames->size();
    758 }
    759 
    760 RegionNameEnumeration::~RegionNameEnumeration() {
    761    delete fRegionNames;
    762 }
    763 
    764 U_NAMESPACE_END
    765 
    766 #endif /* #if !UCONFIG_NO_FORMATTING */
    767 
    768 //eof