tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

usprep.cpp (28361B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2003-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  usprep.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2003jul2
     16 *   created by: Ram Viswanadha
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_IDNA
     22 
     23 #include "unicode/usprep.h"
     24 
     25 #include "unicode/normalizer2.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/uversion.h"
     29 #include "umutex.h"
     30 #include "cmemory.h"
     31 #include "sprpimpl.h"
     32 #include "ustr_imp.h"
     33 #include "uhash.h"
     34 #include "cstring.h"
     35 #include "udataswp.h"
     36 #include "ucln_cmn.h"
     37 #include "ubidi_props.h"
     38 #include "uprops.h"
     39 
     40 U_NAMESPACE_USE
     41 
     42 U_CDECL_BEGIN
     43 
     44 /*
     45 Static cache for already opened StringPrep profiles
     46 */
     47 static UHashtable *SHARED_DATA_HASHTABLE = nullptr;
     48 static icu::UInitOnce gSharedDataInitOnce {};
     49 
     50 static UMutex usprepMutex;
     51 /* format version of spp file */
     52 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     53 
     54 /* the Unicode version of the sprep data */
     55 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     56 
     57 /* Profile names must be aligned to UStringPrepProfileType */
     58 static const char * const PROFILE_NAMES[] = {
     59    "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
     60    "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
     61    "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
     62    "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
     63    "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
     64    "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
     65    "rfc3722",      /* USPREP_RFC3722_ISCSI */
     66    "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
     67    "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
     68    "rfc4011",      /* USPREP_RFC4011_MIB */
     69    "rfc4013",      /* USPREP_RFC4013_SASLPREP */
     70    "rfc4505",      /* USPREP_RFC4505_TRACE */
     71    "rfc4518",      /* USPREP_RFC4518_LDAP */
     72    "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
     73 };
     74 
     75 static UBool U_CALLCONV
     76 isSPrepAcceptable(void * /* context */,
     77             const char * /* type */, 
     78             const char * /* name */,
     79             const UDataInfo *pInfo) {
     80    if(
     81        pInfo->size>=20 &&
     82        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     83        pInfo->charsetFamily==U_CHARSET_FAMILY &&
     84        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
     85        pInfo->dataFormat[1]==0x50 &&
     86        pInfo->dataFormat[2]==0x52 &&
     87        pInfo->dataFormat[3]==0x50 &&
     88        pInfo->formatVersion[0]==3 &&
     89        pInfo->formatVersion[2]==UTRIE_SHIFT &&
     90        pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     91    ) {
     92        //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     93        uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     94        return true;
     95    } else {
     96        return false;
     97    }
     98 }
     99 
    100 static int32_t U_CALLCONV
    101 getSPrepFoldingOffset(uint32_t data) {
    102 
    103    return (int32_t)data;
    104 
    105 }
    106 
    107 /* hashes an entry  */
    108 static int32_t U_CALLCONV 
    109 hashEntry(const UHashTok parm) {
    110    UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
    111    UHashTok namekey, pathkey;
    112    namekey.pointer = b->name;
    113    pathkey.pointer = b->path;
    114    uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
    115            37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
    116    return static_cast<int32_t>(unsignedHash);
    117 }
    118 
    119 /* compares two entries */
    120 static UBool U_CALLCONV 
    121 compareEntries(const UHashTok p1, const UHashTok p2) {
    122    UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
    123    UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
    124    UHashTok name1, name2, path1, path2;
    125    name1.pointer = b1->name;
    126    name2.pointer = b2->name;
    127    path1.pointer = b1->path;
    128    path2.pointer = b2->path;
    129    return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2);
    130 }
    131 
    132 static void 
    133 usprep_unload(UStringPrepProfile* data){
    134    udata_close(data->sprepData);
    135 }
    136 
    137 static int32_t 
    138 usprep_internal_flushCache(UBool noRefCount){
    139    UStringPrepProfile *profile = nullptr;
    140    UStringPrepKey  *key  = nullptr;
    141    int32_t pos = UHASH_FIRST;
    142    int32_t deletedNum = 0;
    143    const UHashElement *e;
    144 
    145    /*
    146     * if shared data hasn't even been lazy evaluated yet
    147     * return 0
    148     */
    149    umtx_lock(&usprepMutex);
    150    if (SHARED_DATA_HASHTABLE == nullptr) {
    151        umtx_unlock(&usprepMutex);
    152        return 0;
    153    }
    154 
    155    /*creates an enumeration to iterate through every element in the table */
    156    while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != nullptr)
    157    {
    158        profile = (UStringPrepProfile *) e->value.pointer;
    159        key  = (UStringPrepKey *) e->key.pointer;
    160 
    161        if ((noRefCount== false && profile->refCount == 0) || 
    162             noRefCount) {
    163            deletedNum++;
    164            uhash_removeElement(SHARED_DATA_HASHTABLE, e);
    165 
    166            /* unload the data */
    167            usprep_unload(profile);
    168 
    169            if(key->name != nullptr) {
    170                uprv_free(key->name);
    171                key->name=nullptr;
    172            }
    173            if(key->path != nullptr) {
    174                uprv_free(key->path);
    175                key->path=nullptr;
    176            }
    177            uprv_free(profile);
    178            uprv_free(key);
    179        }
    180       
    181    }
    182    umtx_unlock(&usprepMutex);
    183 
    184    return deletedNum;
    185 }
    186 
    187 /* Works just like ucnv_flushCache() 
    188 static int32_t 
    189 usprep_flushCache(){
    190    return usprep_internal_flushCache(false);
    191 }
    192 */
    193 
    194 static UBool U_CALLCONV usprep_cleanup(){
    195    if (SHARED_DATA_HASHTABLE != nullptr) {
    196        usprep_internal_flushCache(true);
    197        if (SHARED_DATA_HASHTABLE != nullptr && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
    198            uhash_close(SHARED_DATA_HASHTABLE);
    199            SHARED_DATA_HASHTABLE = nullptr;
    200        }
    201    }
    202    gSharedDataInitOnce.reset();
    203    return (SHARED_DATA_HASHTABLE == nullptr);
    204 }
    205 U_CDECL_END
    206 
    207 
    208 /** Initializes the cache for resources */
    209 static void U_CALLCONV
    210 createCache(UErrorCode &status) {
    211    SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, nullptr, &status);
    212    if (U_FAILURE(status)) {
    213        SHARED_DATA_HASHTABLE = nullptr;
    214    }
    215    ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
    216 }
    217 
    218 static void 
    219 initCache(UErrorCode *status) {
    220    umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
    221 }
    222 
    223 static UBool U_CALLCONV
    224 loadData(UStringPrepProfile* profile, 
    225         const char* path, 
    226         const char* name, 
    227         const char* type, 
    228         UErrorCode* errorCode) {
    229    /* load Unicode SPREP data from file */    
    230    UTrie _sprepTrie = {nullptr, nullptr, nullptr, 0, 0, 0, 0};
    231    UDataMemory *dataMemory;
    232    const int32_t *p=nullptr;
    233    const uint8_t *pb;
    234    UVersionInfo normUnicodeVersion;
    235    int32_t normUniVer, sprepUniVer, normCorrVer;
    236 
    237    if(errorCode==nullptr || U_FAILURE(*errorCode)) {
    238        return 0;
    239    }
    240 
    241    /* open the data outside the mutex block */
    242    //TODO: change the path
    243    dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, nullptr, errorCode);
    244    if(U_FAILURE(*errorCode)) {
    245        return false;
    246    }
    247 
    248    p = static_cast<const int32_t*>(udata_getMemory(dataMemory));
    249    pb = reinterpret_cast<const uint8_t*>(p + _SPREP_INDEX_TOP);
    250    utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    251    _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
    252 
    253 
    254    if(U_FAILURE(*errorCode)) {
    255        udata_close(dataMemory);
    256        return false;
    257    }
    258 
    259    /* in the mutex block, set the data for this process */
    260    umtx_lock(&usprepMutex);
    261    if(profile->sprepData==nullptr) {
    262        profile->sprepData=dataMemory;
    263        dataMemory=nullptr;
    264        uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
    265        uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    266    } else {
    267        p = static_cast<const int32_t*>(udata_getMemory(profile->sprepData));
    268    }
    269    umtx_unlock(&usprepMutex);
    270    /* initialize some variables */
    271    profile->mappingData = reinterpret_cast<const uint16_t*>(reinterpret_cast<const uint8_t*>(p + _SPREP_INDEX_TOP) + profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
    272    
    273    u_getUnicodeVersion(normUnicodeVersion);
    274    normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 
    275                 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    276    sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 
    277                  (dataVersion[2] << 8 ) + (dataVersion[3]);
    278    normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
    279    
    280    if(U_FAILURE(*errorCode)){
    281        udata_close(dataMemory);
    282        return false;
    283    }
    284    if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
    285        normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
    286        ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
    287      ){
    288        *errorCode = U_INVALID_FORMAT_ERROR;
    289        udata_close(dataMemory);
    290        return false;
    291    }
    292    profile->isDataLoaded = true;
    293 
    294    /* if a different thread set it first, then close the extra data */
    295    if(dataMemory!=nullptr) {
    296        udata_close(dataMemory); /* nullptr if it was set correctly */
    297    }
    298 
    299 
    300    return profile->isDataLoaded;
    301 }
    302 
    303 static UStringPrepProfile* 
    304 usprep_getProfile(const char* path, 
    305                  const char* name,
    306                  UErrorCode *status){
    307 
    308    UStringPrepProfile* profile = nullptr;
    309 
    310    initCache(status);
    311 
    312    if(U_FAILURE(*status)){
    313        return nullptr;
    314    }
    315 
    316    UStringPrepKey stackKey;
    317    /* 
    318     * const is cast way to save malloc, strcpy and free calls 
    319     * we use the passed in pointers for fetching the data from the 
    320     * hash table which is safe
    321     */
    322    stackKey.name = const_cast<char*>(name);
    323    stackKey.path = const_cast<char*>(path);
    324 
    325    /* fetch the data from the cache */
    326    umtx_lock(&usprepMutex);
    327    profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey));
    328    if(profile != nullptr) {
    329        profile->refCount++;
    330    }
    331    umtx_unlock(&usprepMutex);
    332    
    333    if(profile == nullptr) {
    334        /* else load the data and put the data in the cache */
    335        LocalMemory<UStringPrepProfile> newProfile;
    336        if(newProfile.allocateInsteadAndReset() == nullptr) {
    337            *status = U_MEMORY_ALLOCATION_ERROR;
    338            return nullptr;
    339        }
    340 
    341        /* load the data */
    342        if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
    343            return nullptr;
    344        }
    345 
    346        /* get the options */
    347        newProfile->doNFKC = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
    348        newProfile->checkBiDi = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
    349 
    350        LocalMemory<UStringPrepKey> key;
    351        LocalMemory<char> keyName;
    352        LocalMemory<char> keyPath;
    353        if( key.allocateInsteadAndReset() == nullptr ||
    354            keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == nullptr ||
    355            (path != nullptr &&
    356             keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == nullptr)
    357         ) {
    358            *status = U_MEMORY_ALLOCATION_ERROR;
    359            usprep_unload(newProfile.getAlias());
    360            return nullptr;
    361        }
    362 
    363        umtx_lock(&usprepMutex);
    364        // If another thread already inserted the same key/value, refcount and cleanup our thread data
    365        profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey));
    366        if(profile != nullptr) {
    367            profile->refCount++;
    368            usprep_unload(newProfile.getAlias());
    369        }
    370        else {
    371            /* initialize the key members */
    372            key->name = keyName.orphan();
    373            uprv_strcpy(key->name, name);
    374            if(path != nullptr){
    375                key->path = keyPath.orphan();
    376                uprv_strcpy(key->path, path);
    377            }        
    378            profile = newProfile.orphan();
    379    
    380            /* add the data object to the cache */
    381            profile->refCount = 1;
    382            uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
    383        }
    384        umtx_unlock(&usprepMutex);
    385    }
    386 
    387    return profile;
    388 }
    389 
    390 U_CAPI UStringPrepProfile* U_EXPORT2
    391 usprep_open(const char* path, 
    392            const char* name,
    393            UErrorCode* status){
    394 
    395    if(status == nullptr || U_FAILURE(*status)){
    396        return nullptr;
    397    }
    398       
    399    /* initialize the profile struct members */
    400    return usprep_getProfile(path,name,status);
    401 }
    402 
    403 U_CAPI UStringPrepProfile* U_EXPORT2
    404 usprep_openByType(UStringPrepProfileType type,
    405 			  UErrorCode* status) {
    406    if(status == nullptr || U_FAILURE(*status)){
    407        return nullptr;
    408    }
    409    int32_t index = (int32_t)type;
    410    if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
    411        *status = U_ILLEGAL_ARGUMENT_ERROR;
    412        return nullptr;
    413    }
    414    return usprep_open(nullptr, PROFILE_NAMES[index], status);
    415 }
    416 
    417 U_CAPI void U_EXPORT2
    418 usprep_close(UStringPrepProfile* profile){
    419    if(profile==nullptr){
    420        return;
    421    }
    422 
    423    umtx_lock(&usprepMutex);
    424    /* decrement the ref count*/
    425    if(profile->refCount > 0){
    426        profile->refCount--;
    427    }
    428    umtx_unlock(&usprepMutex);
    429    
    430 }
    431 
    432 U_CFUNC void 
    433 uprv_syntaxError(const char16_t* rules,
    434                 int32_t pos,
    435                 int32_t rulesLen,
    436                 UParseError* parseError){
    437    if(parseError == nullptr){
    438        return;
    439    }
    440    parseError->offset = pos;
    441    parseError->line = 0 ; // we are not using line numbers 
    442    
    443    // for pre-context
    444    int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    445    int32_t limit = pos;
    446    
    447    u_memcpy(parseError->preContext,rules+start,limit-start);
    448    //null terminate the buffer
    449    parseError->preContext[limit-start] = 0;
    450    
    451    // for post-context; include error rules[pos]
    452    start = pos;
    453    limit = start + (U_PARSE_CONTEXT_LEN-1);
    454    if (limit > rulesLen) {
    455        limit = rulesLen;
    456    }
    457    if (start < rulesLen) {
    458        u_memcpy(parseError->postContext,rules+start,limit-start);
    459    }
    460    //null terminate the buffer
    461    parseError->postContext[limit-start]= 0;
    462 }
    463 
    464 
    465 static inline UStringPrepType
    466 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
    467 
    468    UStringPrepType type;
    469    if(trieWord == 0){
    470        /* 
    471         * Initial value stored in the mapping table 
    472         * just return USPREP_TYPE_LIMIT .. so that
    473         * the source codepoint is copied to the destination
    474         */
    475        type = USPREP_TYPE_LIMIT;
    476        isIndex =false;
    477        value = 0;
    478    }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
    479        type = static_cast<UStringPrepType>(trieWord - _SPREP_TYPE_THRESHOLD);
    480        isIndex =false;
    481        value = 0;
    482    }else{
    483        /* get the type */
    484        type = USPREP_MAP;
    485        /* ascertain if the value is index or delta */
    486        if(trieWord & 0x02){
    487            isIndex = true;
    488            value = trieWord  >> 2; //mask off the lower 2 bits and shift
    489        }else{
    490            isIndex = false;
    491            value = static_cast<int16_t>(trieWord);
    492            value =  (value >> 2);
    493        }
    494 
    495        if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
    496            type = USPREP_DELETE;
    497            isIndex =false;
    498            value = 0;
    499        }
    500    }
    501    return type;
    502 }
    503 
    504 // TODO: change to writing to UnicodeString not char16_t *
    505 static int32_t 
    506 usprep_map(  const UStringPrepProfile* profile, 
    507             const char16_t* src, int32_t srcLength,
    508             char16_t* dest, int32_t destCapacity,
    509             int32_t options,
    510             UParseError* parseError,
    511             UErrorCode* status ){
    512    
    513    uint16_t result;
    514    int32_t destIndex=0;
    515    int32_t srcIndex;
    516    UBool allowUnassigned = static_cast<UBool>((options & USPREP_ALLOW_UNASSIGNED) > 0);
    517    UStringPrepType type;
    518    int16_t value;
    519    UBool isIndex;
    520    const int32_t* indexes = profile->indexes;
    521 
    522    // no error checking the caller check for error and arguments
    523    // no string length check the caller finds out the string length
    524 
    525    for(srcIndex=0;srcIndex<srcLength;){
    526        UChar32 ch;
    527 
    528        U16_NEXT(src,srcIndex,srcLength,ch);
    529        
    530        result=0;
    531 
    532        UTRIE_GET16(&profile->sprepTrie,ch,result);
    533        
    534        type = getValues(result, value, isIndex);
    535 
    536        // check if the source codepoint is unassigned
    537        if(type == USPREP_UNASSIGNED && allowUnassigned == false){
    538 
    539            uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
    540            *status = U_STRINGPREP_UNASSIGNED_ERROR;
    541            return 0;
    542            
    543        }else if(type == USPREP_MAP){
    544            
    545            int32_t index, length;
    546 
    547            if(isIndex){
    548                index = value;
    549                if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    550                         index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    551                    length = 1;
    552                }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    553                         index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    554                    length = 2;
    555                }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    556                         index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    557                    length = 3;
    558                }else{
    559                    length = profile->mappingData[index++];
    560         
    561                }
    562 
    563                /* copy mapping to destination */
    564                for(int32_t i=0; i< length; i++){
    565                    if(destIndex < destCapacity  ){
    566                        dest[destIndex] = profile->mappingData[index+i];
    567                    }
    568                    destIndex++; /* for pre-flighting */
    569                }  
    570                continue;
    571            }else{
    572                // subtract the delta to arrive at the code point
    573                ch -= value;
    574            }
    575 
    576        }else if(type==USPREP_DELETE){
    577             // just consume the codepoint and continue
    578            continue;
    579        }
    580        //copy the code point into destination
    581        if(ch <= 0xFFFF){
    582            if(destIndex < destCapacity ){
    583                dest[destIndex] = static_cast<char16_t>(ch);
    584            }
    585            destIndex++;
    586        }else{
    587            if(destIndex+1 < destCapacity ){
    588                dest[destIndex]   = U16_LEAD(ch);
    589                dest[destIndex+1] = U16_TRAIL(ch);
    590            }
    591            destIndex +=2;
    592        }
    593       
    594    }
    595        
    596    return u_terminateUChars(dest, destCapacity, destIndex, status);
    597 }
    598 
    599 /*
    600   1) Map -- For each character in the input, check if it has a mapping
    601      and, if so, replace it with its mapping.  
    602 
    603   2) Normalize -- Possibly normalize the result of step 1 using Unicode
    604      normalization. 
    605 
    606   3) Prohibit -- Check for any characters that are not allowed in the
    607      output.  If any are found, return an error.  
    608 
    609   4) Check bidi -- Possibly check for right-to-left characters, and if
    610      any are found, make sure that the whole string satisfies the
    611      requirements for bidirectional strings.  If the string does not
    612      satisfy the requirements for bidirectional strings, return an
    613      error.  
    614      [Unicode3.2] defines several bidirectional categories; each character
    615       has one bidirectional category assigned to it.  For the purposes of
    616       the requirements below, an "RandALCat character" is a character that
    617       has Unicode bidirectional categories "R" or "AL"; an "LCat character"
    618       is a character that has Unicode bidirectional category "L".  Note
    619 
    620 
    621       that there are many characters which fall in neither of the above
    622       definitions; Latin digits (<U+0030> through <U+0039>) are examples of
    623       this because they have bidirectional category "EN".
    624 
    625       In any profile that specifies bidirectional character handling, all
    626       three of the following requirements MUST be met:
    627 
    628       1) The characters in section 5.8 MUST be prohibited.
    629 
    630       2) If a string contains any RandALCat character, the string MUST NOT
    631          contain any LCat character.
    632 
    633       3) If a string contains any RandALCat character, a RandALCat
    634          character MUST be the first character of the string, and a
    635          RandALCat character MUST be the last character of the string.
    636 */
    637 U_CAPI int32_t U_EXPORT2
    638 usprep_prepare(   const UStringPrepProfile* profile,
    639                  const char16_t* src, int32_t srcLength,
    640                  char16_t* dest, int32_t destCapacity,
    641                  int32_t options,
    642                  UParseError* parseError,
    643                  UErrorCode* status ){
    644 
    645    // check error status
    646    if(U_FAILURE(*status)){
    647        return 0;
    648    }
    649 
    650    //check arguments
    651    if(profile==nullptr ||
    652            (src==nullptr ? srcLength!=0 : srcLength<-1) ||
    653            (dest==nullptr ? destCapacity!=0 : destCapacity<0)) {
    654        *status=U_ILLEGAL_ARGUMENT_ERROR;
    655        return 0;
    656    }
    657 
    658    //get the string length
    659    if(srcLength < 0){
    660        srcLength = u_strlen(src);
    661    }
    662    // map
    663    UnicodeString s1;
    664    char16_t *b1 = s1.getBuffer(srcLength);
    665    if(b1==nullptr){
    666        *status = U_MEMORY_ALLOCATION_ERROR;
    667        return 0;
    668    }
    669    UErrorCode bufferStatus = U_ZERO_ERROR;
    670    int32_t b1Len = usprep_map(profile, src, srcLength,
    671                               b1, s1.getCapacity(), options, parseError, &bufferStatus);
    672    s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
    673 
    674    if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
    675        // redo processing of string
    676        /* we do not have enough room so grow the buffer*/
    677        b1 = s1.getBuffer(b1Len);
    678        if(b1==nullptr){
    679            *status = U_MEMORY_ALLOCATION_ERROR;
    680            return 0;
    681        }
    682 
    683        bufferStatus = U_ZERO_ERROR; // reset error
    684        b1Len = usprep_map(profile, src, srcLength,
    685                           b1, s1.getCapacity(), options, parseError, &bufferStatus);
    686        s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
    687    }
    688    if(U_FAILURE(bufferStatus)){
    689        *status = bufferStatus;
    690        return 0;
    691    }
    692 
    693    // normalize
    694    UnicodeString s2;
    695    if(profile->doNFKC){
    696        const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
    697        FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
    698        if(U_FAILURE(*status)){
    699            return 0;
    700        }
    701        fn2.normalize(s1, s2, *status);
    702    }else{
    703        s2.fastCopyFrom(s1);
    704    }
    705    if(U_FAILURE(*status)){
    706        return 0;
    707    }
    708 
    709    // Prohibit and checkBiDi in one pass
    710    const char16_t *b2 = s2.getBuffer();
    711    int32_t b2Len = s2.length();
    712    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    713    UBool leftToRight=false, rightToLeft=false;
    714    int32_t rtlPos =-1, ltrPos =-1;
    715 
    716    for(int32_t b2Index=0; b2Index<b2Len;){
    717        UChar32 ch = 0;
    718        U16_NEXT(b2, b2Index, b2Len, ch);
    719 
    720        uint16_t result;
    721        UTRIE_GET16(&profile->sprepTrie,ch,result);
    722 
    723        int16_t value;
    724        UBool isIndex;
    725        UStringPrepType type = getValues(result, value, isIndex);
    726 
    727        if( type == USPREP_PROHIBITED || 
    728            ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
    729           ){
    730            *status = U_STRINGPREP_PROHIBITED_ERROR;
    731            uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
    732            return 0;
    733        }
    734 
    735        if(profile->checkBiDi) {
    736            direction = ubidi_getClass(ch);
    737            if(firstCharDir == U_CHAR_DIRECTION_COUNT){
    738                firstCharDir = direction;
    739            }
    740            if(direction == U_LEFT_TO_RIGHT){
    741                leftToRight = true;
    742                ltrPos = b2Index-1;
    743            }
    744            if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
    745                rightToLeft = true;
    746                rtlPos = b2Index-1;
    747            }
    748        }
    749    }
    750    if(profile->checkBiDi){
    751        // satisfy 2
    752        if( leftToRight && rightToLeft){
    753            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    754            uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
    755            return 0;
    756        }
    757 
    758        //satisfy 3
    759        if( rightToLeft && 
    760            !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
    761              (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
    762           ){
    763            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
    764            uprv_syntaxError(b2, rtlPos, b2Len, parseError);
    765            return false;
    766        }
    767    }
    768    return s2.extract(dest, destCapacity, *status);
    769 }
    770 
    771 
    772 /* data swapping ------------------------------------------------------------ */
    773 
    774 U_CAPI int32_t U_EXPORT2
    775 usprep_swap(const UDataSwapper *ds,
    776            const void *inData, int32_t length, void *outData,
    777            UErrorCode *pErrorCode) {
    778    const UDataInfo *pInfo;
    779    int32_t headerSize;
    780 
    781    const uint8_t *inBytes;
    782    uint8_t *outBytes;
    783 
    784    const int32_t *inIndexes;
    785    int32_t indexes[16];
    786 
    787    int32_t i, offset, count, size;
    788 
    789    /* udata_swapDataHeader checks the arguments */
    790    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    791    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    792        return 0;
    793    }
    794 
    795    /* check data format and format version */
    796    pInfo=(const UDataInfo *)((const char *)inData+4);
    797    if(!(
    798        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    799        pInfo->dataFormat[1]==0x50 &&
    800        pInfo->dataFormat[2]==0x52 &&
    801        pInfo->dataFormat[3]==0x50 &&
    802        pInfo->formatVersion[0]==3
    803    )) {
    804        udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
    805                         pInfo->dataFormat[0], pInfo->dataFormat[1],
    806                         pInfo->dataFormat[2], pInfo->dataFormat[3],
    807                         pInfo->formatVersion[0]);
    808        *pErrorCode=U_UNSUPPORTED_ERROR;
    809        return 0;
    810    }
    811 
    812    inBytes=(const uint8_t *)inData+headerSize;
    813    outBytes= (outData == nullptr ) ? nullptr : (uint8_t *)outData+headerSize;
    814 
    815    inIndexes=(const int32_t *)inBytes;
    816 
    817    if(length>=0) {
    818        length-=headerSize;
    819        if(length<16*4) {
    820            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
    821                             length);
    822            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    823            return 0;
    824        }
    825    }
    826 
    827    /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
    828    for(i=0; i<16; ++i) {
    829        indexes[i]=udata_readInt32(ds, inIndexes[i]);
    830    }
    831 
    832    /* calculate the total length of the data */
    833    size=
    834        16*4+ /* size of indexes[] */
    835        indexes[_SPREP_INDEX_TRIE_SIZE]+
    836        indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    837 
    838    if(length>=0) {
    839        if(length<size) {
    840            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
    841                             length);
    842            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    843            return 0;
    844        }
    845 
    846        /* copy the data for inaccessible bytes */
    847        if(inBytes!=outBytes) {
    848            uprv_memcpy(outBytes, inBytes, size);
    849        }
    850 
    851        offset=0;
    852 
    853        /* swap the int32_t indexes[] */
    854        count=16*4;
    855        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    856        offset+=count;
    857 
    858        /* swap the UTrie */
    859        count=indexes[_SPREP_INDEX_TRIE_SIZE];
    860        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    861        offset+=count;
    862 
    863        /* swap the uint16_t mappingTable[] */
    864        count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
    865        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    866        //offset+=count;
    867    }
    868 
    869    return headerSize+size;
    870 }
    871 
    872 #endif /* #if !UCONFIG_NO_IDNA */