tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

propname.cpp (10195B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (c) 2002-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 * Author: Alan Liu
      9 * Created: October 30 2002
     10 * Since: ICU 2.4
     11 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
     12 **********************************************************************
     13 */
     14 #include "propname.h"
     15 #include "unicode/uchar.h"
     16 #include "unicode/udata.h"
     17 #include "unicode/uscript.h"
     18 #include "umutex.h"
     19 #include "cmemory.h"
     20 #include "cstring.h"
     21 #include "uarrsort.h"
     22 #include "uinvchar.h"
     23 
     24 #define INCLUDED_FROM_PROPNAME_CPP
     25 #include "propname_data.h"
     26 
     27 U_CDECL_BEGIN
     28 
     29 /**
     30 * Get the next non-ignorable ASCII character from a property name
     31 * and lowercases it.
     32 * @return ((advance count for the name)<<8)|character
     33 */
     34 static inline int32_t
     35 getASCIIPropertyNameChar(const char *name) {
     36    int32_t i;
     37    char c;
     38 
     39    /* Ignore delimiters '-', '_', and ASCII White_Space */
     40    for(i=0;
     41        (c=name[i++])==0x2d || c==0x5f ||
     42        c==0x20 || (0x09<=c && c<=0x0d);
     43    ) {}
     44 
     45    if(c!=0) {
     46        return (i << 8) | static_cast<uint8_t>(uprv_asciitolower(c));
     47    } else {
     48        return i<<8;
     49    }
     50 }
     51 
     52 /**
     53 * Get the next non-ignorable EBCDIC character from a property name
     54 * and lowercases it.
     55 * @return ((advance count for the name)<<8)|character
     56 */
     57 static inline int32_t
     58 getEBCDICPropertyNameChar(const char *name) {
     59    int32_t i;
     60    char c;
     61 
     62    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
     63    for(i=0;
     64        (c=name[i++])==0x60 || c==0x6d ||
     65        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
     66    ) {}
     67 
     68    if(c!=0) {
     69        return (i << 8) | static_cast<uint8_t>(uprv_ebcdictolower(c));
     70    } else {
     71        return i<<8;
     72    }
     73 }
     74 
     75 /**
     76 * Unicode property names and property value names are compared "loosely".
     77 *
     78 * UCD.html 4.0.1 says:
     79 *   For all property names, property value names, and for property values for
     80 *   Enumerated, Binary, or Catalog properties, use the following
     81 *   loose matching rule:
     82 *
     83 *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
     84 *
     85 * This function does just that, for (char *) name strings.
     86 * It is almost identical to ucnv_compareNames() but also ignores
     87 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
     88 *
     89 * @internal
     90 */
     91 
     92 U_CAPI int32_t U_EXPORT2
     93 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
     94    int32_t rc, r1, r2;
     95 
     96    for(;;) {
     97        r1=getASCIIPropertyNameChar(name1);
     98        r2=getASCIIPropertyNameChar(name2);
     99 
    100        /* If we reach the ends of both strings then they match */
    101        if(((r1|r2)&0xff)==0) {
    102            return 0;
    103        }
    104 
    105        /* Compare the lowercased characters */
    106        if(r1!=r2) {
    107            rc=(r1&0xff)-(r2&0xff);
    108            if(rc!=0) {
    109                return rc;
    110            }
    111        }
    112 
    113        name1+=r1>>8;
    114        name2+=r2>>8;
    115    }
    116 }
    117 
    118 U_CAPI int32_t U_EXPORT2
    119 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
    120    int32_t rc, r1, r2;
    121 
    122    for(;;) {
    123        r1=getEBCDICPropertyNameChar(name1);
    124        r2=getEBCDICPropertyNameChar(name2);
    125 
    126        /* If we reach the ends of both strings then they match */
    127        if(((r1|r2)&0xff)==0) {
    128            return 0;
    129        }
    130 
    131        /* Compare the lowercased characters */
    132        if(r1!=r2) {
    133            rc=(r1&0xff)-(r2&0xff);
    134            if(rc!=0) {
    135                return rc;
    136            }
    137        }
    138 
    139        name1+=r1>>8;
    140        name2+=r2>>8;
    141    }
    142 }
    143 
    144 U_CDECL_END
    145 
    146 U_NAMESPACE_BEGIN
    147 
    148 int32_t PropNameData::findProperty(int32_t property) {
    149    int32_t i=1;  // valueMaps index, initially after numRanges
    150    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
    151        // Read and skip the start and limit of this range.
    152        int32_t start=valueMaps[i];
    153        int32_t limit=valueMaps[i+1];
    154        i+=2;
    155        if(property<start) {
    156            break;
    157        }
    158        if(property<limit) {
    159            return i+(property-start)*2;
    160        }
    161        i+=(limit-start)*2;  // Skip all entries for this range.
    162    }
    163    return 0;
    164 }
    165 
    166 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
    167    if(valueMapIndex==0) {
    168        return 0;  // The property does not have named values.
    169    }
    170    ++valueMapIndex;  // Skip the BytesTrie offset.
    171    int32_t numRanges=valueMaps[valueMapIndex++];
    172    if(numRanges<0x10) {
    173        // Ranges of values.
    174        for(; numRanges>0; --numRanges) {
    175            // Read and skip the start and limit of this range.
    176            int32_t start=valueMaps[valueMapIndex];
    177            int32_t limit=valueMaps[valueMapIndex+1];
    178            valueMapIndex+=2;
    179            if(value<start) {
    180                break;
    181            }
    182            if(value<limit) {
    183                return valueMaps[valueMapIndex+value-start];
    184            }
    185            valueMapIndex+=limit-start;  // Skip all entries for this range.
    186        }
    187    } else {
    188        // List of values.
    189        int32_t valuesStart=valueMapIndex;
    190        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
    191        do {
    192            int32_t v=valueMaps[valueMapIndex];
    193            if(value<v) {
    194                break;
    195            }
    196            if(value==v) {
    197                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
    198            }
    199        } while(++valueMapIndex<nameGroupOffsetsStart);
    200    }
    201    return 0;
    202 }
    203 
    204 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
    205    int32_t numNames=*nameGroup++;
    206    if(nameIndex<0 || numNames<=nameIndex) {
    207        return nullptr;
    208    }
    209    // Skip nameIndex names.
    210    for(; nameIndex>0; --nameIndex) {
    211        nameGroup=uprv_strchr(nameGroup, 0)+1;
    212    }
    213    if(*nameGroup==0) {
    214        return nullptr;  // no name (Property[Value]Aliases.txt has "n/a")
    215    }
    216    return nameGroup;
    217 }
    218 
    219 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
    220    if(name==nullptr) {
    221        return false;
    222    }
    223    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
    224    char c;
    225    while((c=*name++)!=0) {
    226        c=uprv_invCharToLowercaseAscii(c);
    227        // Ignore delimiters '-', '_', and ASCII White_Space.
    228        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
    229            continue;
    230        }
    231        if(!USTRINGTRIE_HAS_NEXT(result)) {
    232            return false;
    233        }
    234        result = trie.next(static_cast<uint8_t>(c));
    235    }
    236    return USTRINGTRIE_HAS_VALUE(result);
    237 }
    238 
    239 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
    240    int32_t valueMapIndex=findProperty(property);
    241    if(valueMapIndex==0) {
    242        return nullptr;  // Not a known property.
    243    }
    244    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
    245 }
    246 
    247 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
    248    int32_t valueMapIndex=findProperty(property);
    249    if(valueMapIndex==0) {
    250        return nullptr;  // Not a known property.
    251    }
    252    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
    253    if(nameGroupOffset==0) {
    254        return nullptr;
    255    }
    256    return getName(nameGroups+nameGroupOffset, nameChoice);
    257 }
    258 
    259 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
    260    BytesTrie trie(bytesTries+bytesTrieOffset);
    261    if(containsName(trie, alias)) {
    262        return trie.getValue();
    263    } else {
    264        return UCHAR_INVALID_CODE;
    265    }
    266 }
    267 
    268 int32_t PropNameData::getPropertyEnum(const char *alias) {
    269    return getPropertyOrValueEnum(0, alias);
    270 }
    271 
    272 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
    273    int32_t valueMapIndex=findProperty(property);
    274    if(valueMapIndex==0) {
    275        return UCHAR_INVALID_CODE;  // Not a known property.
    276    }
    277    valueMapIndex=valueMaps[valueMapIndex+1];
    278    if(valueMapIndex==0) {
    279        return UCHAR_INVALID_CODE;  // The property does not have named values.
    280    }
    281    // valueMapIndex is the start of the property's valueMap,
    282    // where the first word is the BytesTrie offset.
    283    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    284 }
    285 U_NAMESPACE_END
    286 
    287 //----------------------------------------------------------------------
    288 // Public API implementation
    289 
    290 U_CAPI const char* U_EXPORT2
    291 u_getPropertyName(UProperty property,
    292                  UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED {
    293    // The nameChoice is really an integer with a couple of named constants.
    294    // Unicode allows for names other than short and long ones.
    295    // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,...
    296    U_NAMESPACE_USE
    297    return PropNameData::getPropertyName(property, nameChoice);
    298 }
    299 
    300 U_CAPI UProperty U_EXPORT2
    301 u_getPropertyEnum(const char* alias) {
    302    U_NAMESPACE_USE
    303    return (UProperty)PropNameData::getPropertyEnum(alias);
    304 }
    305 
    306 U_CAPI const char* U_EXPORT2
    307 u_getPropertyValueName(UProperty property,
    308                       int32_t value,
    309                       UPropertyNameChoice nameChoice) UPRV_NO_SANITIZE_UNDEFINED {
    310    // The nameChoice is really an integer with a couple of named constants.
    311    // Unicode allows for names other than short and long ones.
    312    // If present, these will be returned for U_LONG_PROPERTY_NAME + i, where i=1, 2,...
    313    U_NAMESPACE_USE
    314    return PropNameData::getPropertyValueName(property, value, nameChoice);
    315 }
    316 
    317 U_CAPI int32_t U_EXPORT2
    318 u_getPropertyValueEnum(UProperty property,
    319                       const char* alias) {
    320    U_NAMESPACE_USE
    321    return PropNameData::getPropertyValueEnum(property, alias);
    322 }
    323 
    324 U_CAPI const char*  U_EXPORT2
    325 uscript_getName(UScriptCode scriptCode){
    326    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    327                                  U_LONG_PROPERTY_NAME);
    328 }
    329 
    330 U_CAPI const char*  U_EXPORT2
    331 uscript_getShortName(UScriptCode scriptCode){
    332    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    333                                  U_SHORT_PROPERTY_NAME);
    334 }