tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cstring.cpp (8773B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1997-2011, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 * File CSTRING.C
     12 *
     13 * @author       Helena Shih
     14 *
     15 * Modification History:
     16 *
     17 *   Date        Name        Description
     18 *   6/18/98     hshih       Created
     19 *   09/08/98    stephen     Added include for ctype, for Mac Port
     20 *   11/15/99    helena      Integrated S/390 IEEE changes. 
     21 ******************************************************************************
     22 */
     23 
     24 
     25 
     26 #include <stdlib.h>
     27 #include <stdio.h>
     28 #include "unicode/utypes.h"
     29 #include "cmemory.h"
     30 #include "cstring.h"
     31 #include "uassert.h"
     32 
     33 /*
     34 * We hardcode case conversion for invariant characters to match our expectation
     35 * and the compiler execution charset.
     36 * This prevents problems on systems
     37 * - with non-default casing behavior, like Turkish system locales where
     38 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
     39 * - where there are no lowercase Latin characters at all, or using different
     40 *   codes (some old EBCDIC codepages)
     41 *
     42 * This works because the compiler usually runs on a platform where the execution
     43 * charset includes all of the invariant characters at their expected
     44 * code positions, so that the char * string literals in ICU code match
     45 * the char literals here.
     46 *
     47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
     48 * and the set of uppercase Latin letters is discontiguous as well.
     49 */
     50 
     51 U_CAPI UBool U_EXPORT2
     52 uprv_isASCIILetter(char c) {
     53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     54    return
     55        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
     56        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
     57 #else
     58    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
     59 #endif
     60 }
     61 
     62 U_CAPI char U_EXPORT2
     63 uprv_toupper(char c) {
     64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     65    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
     66        c=(char)(c+('A'-'a'));
     67    }
     68 #else
     69    if('a'<=c && c<='z') {
     70        c=(char)(c+('A'-'a'));
     71    }
     72 #endif
     73    return c;
     74 }
     75 
     76 
     77 #if 0
     78 /*
     79 * Commented out because cstring.h defines uprv_tolower() to be
     80 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
     81 * to reduce the amount of code to cover with tests.
     82 *
     83 * Note that this uprv_tolower() definition is likely to work for most
     84 * charset families, not just ASCII and EBCDIC, because its #else branch
     85 * is written generically.
     86 */
     87 U_CAPI char U_EXPORT2
     88 uprv_tolower(char c) {
     89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     90    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
     91        c=(char)(c+('a'-'A'));
     92    }
     93 #else
     94    if('A'<=c && c<='Z') {
     95        c=(char)(c+('a'-'A'));
     96    }
     97 #endif
     98    return c;
     99 }
    100 #endif
    101 
    102 U_CAPI char U_EXPORT2
    103 uprv_asciitolower(char c) {
    104    if(0x41<=c && c<=0x5a) {
    105        c=(char)(c+0x20);
    106    }
    107    return c;
    108 }
    109 
    110 U_CAPI char U_EXPORT2
    111 uprv_ebcdictolower(char c) {
    112    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
    113        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
    114        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
    115    ) {
    116        c=(char)(c-0x40);
    117    }
    118    return c;
    119 }
    120 
    121 
    122 U_CAPI char* U_EXPORT2
    123 T_CString_toLowerCase(char* str)
    124 {
    125    char* origPtr = str;
    126 
    127    if (str) {
    128        do
    129            *str = uprv_tolower(*str);
    130        while (*(str++));
    131    }
    132 
    133    return origPtr;
    134 }
    135 
    136 U_CAPI char* U_EXPORT2
    137 T_CString_toUpperCase(char* str)
    138 {
    139    char* origPtr = str;
    140 
    141    if (str) {
    142        do
    143            *str = uprv_toupper(*str);
    144        while (*(str++));
    145    }
    146 
    147    return origPtr;
    148 }
    149 
    150 /*
    151 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
    152 * Does not handle negative values (makes an empty string for them).
    153 * Writes at most 12 chars ("-2147483647" plus NUL).
    154 * Returns the length of the string (not including the NUL).
    155 */
    156 U_CAPI int32_t U_EXPORT2
    157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
    158 {
    159    char      tbuf[30];
    160    int32_t   tbx    = sizeof(tbuf);
    161    uint8_t   digit;
    162    int32_t   length = 0;
    163    uint32_t  uval;
    164    
    165    U_ASSERT(radix>=2 && radix<=16);
    166    uval = (uint32_t) v;
    167    if(v<0 && radix == 10) {
    168        /* Only in base 10 do we conside numbers to be signed. */
    169        uval = (uint32_t)(-v); 
    170        buffer[length++] = '-';
    171    }
    172    
    173    tbx = sizeof(tbuf)-1;
    174    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    175    do {
    176        digit = (uint8_t)(uval % radix);
    177        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
    178        uval  = uval / radix;
    179    } while (uval != 0);
    180    
    181    /* copy converted number into user buffer  */
    182    uprv_strcpy(buffer+length, tbuf+tbx);
    183    length += sizeof(tbuf) - tbx -1;
    184    return length;
    185 }
    186 
    187 
    188 
    189 /*
    190 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
    191 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
    192 * Returns the length of the string, not including the terminating NUL.
    193 */
    194 U_CAPI int32_t U_EXPORT2
    195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
    196 {
    197    char      tbuf[30];
    198    int32_t   tbx    = sizeof(tbuf);
    199    uint8_t   digit;
    200    int32_t   length = 0;
    201    uint64_t  uval;
    202    
    203    U_ASSERT(radix>=2 && radix<=16);
    204    uval = (uint64_t) v;
    205    if(v<0 && radix == 10) {
    206        /* Only in base 10 do we conside numbers to be signed. */
    207        uval = (uint64_t)(-v); 
    208        buffer[length++] = '-';
    209    }
    210    
    211    tbx = sizeof(tbuf)-1;
    212    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    213    do {
    214        digit = (uint8_t)(uval % radix);
    215        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
    216        uval  = uval / radix;
    217    } while (uval != 0);
    218    
    219    /* copy converted number into user buffer  */
    220    uprv_strcpy(buffer+length, tbuf+tbx);
    221    length += sizeof(tbuf) - tbx -1;
    222    return length;
    223 }
    224 
    225 
    226 U_CAPI int32_t U_EXPORT2
    227 T_CString_stringToInteger(const char *integerString, int32_t radix)
    228 {
    229    char *end;
    230    return uprv_strtoul(integerString, &end, radix);
    231 
    232 }
    233 
    234 U_CAPI int U_EXPORT2
    235 uprv_stricmp(const char *str1, const char *str2) {
    236    if(str1==nullptr) {
    237        if(str2==nullptr) {
    238            return 0;
    239        } else {
    240            return -1;
    241        }
    242    } else if(str2==nullptr) {
    243        return 1;
    244    } else {
    245        /* compare non-nullptr strings lexically with lowercase */
    246        int rc;
    247        unsigned char c1, c2;
    248 
    249        for(;;) {
    250            c1=(unsigned char)*str1;
    251            c2=(unsigned char)*str2;
    252            if(c1==0) {
    253                if(c2==0) {
    254                    return 0;
    255                } else {
    256                    return -1;
    257                }
    258            } else if(c2==0) {
    259                return 1;
    260            } else {
    261                /* compare non-zero characters with lowercase */
    262                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
    263                if(rc!=0) {
    264                    return rc;
    265                }
    266            }
    267            ++str1;
    268            ++str2;
    269        }
    270    }
    271 }
    272 
    273 U_CAPI int U_EXPORT2
    274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
    275    if(str1==nullptr) {
    276        if(str2==nullptr) {
    277            return 0;
    278        } else {
    279            return -1;
    280        }
    281    } else if(str2==nullptr) {
    282        return 1;
    283    } else {
    284        /* compare non-nullptr strings lexically with lowercase */
    285        int rc;
    286        unsigned char c1, c2;
    287 
    288        for(; n--;) {
    289            c1=(unsigned char)*str1;
    290            c2=(unsigned char)*str2;
    291            if(c1==0) {
    292                if(c2==0) {
    293                    return 0;
    294                } else {
    295                    return -1;
    296                }
    297            } else if(c2==0) {
    298                return 1;
    299            } else {
    300                /* compare non-zero characters with lowercase */
    301                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
    302                if(rc!=0) {
    303                    return rc;
    304                }
    305            }
    306            ++str1;
    307            ++str2;
    308        }
    309    }
    310 
    311    return 0;
    312 }
    313 
    314 U_CAPI char* U_EXPORT2
    315 uprv_strdup(const char *src) {
    316    size_t len = uprv_strlen(src) + 1;
    317    char *dup = (char *) uprv_malloc(len);
    318 
    319    if (dup) {
    320        uprv_memcpy(dup, src, len);
    321    }
    322 
    323    return dup;
    324 }
    325 
    326 U_CAPI char* U_EXPORT2
    327 uprv_strndup(const char *src, int32_t n) {
    328    char *dup;
    329 
    330    if(n < 0) {
    331        dup = uprv_strdup(src);
    332    } else {
    333        dup = (char*)uprv_malloc(n+1);
    334        if (dup) { 
    335            uprv_memcpy(dup, src, n);
    336            dup[n] = 0;
    337        }
    338    }
    339 
    340    return dup;
    341 }