cstring.cpp (8773B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1997-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * File CSTRING.C 12 * 13 * @author Helena Shih 14 * 15 * Modification History: 16 * 17 * Date Name Description 18 * 6/18/98 hshih Created 19 * 09/08/98 stephen Added include for ctype, for Mac Port 20 * 11/15/99 helena Integrated S/390 IEEE changes. 21 ****************************************************************************** 22 */ 23 24 25 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include "unicode/utypes.h" 29 #include "cmemory.h" 30 #include "cstring.h" 31 #include "uassert.h" 32 33 /* 34 * We hardcode case conversion for invariant characters to match our expectation 35 * and the compiler execution charset. 36 * This prevents problems on systems 37 * - with non-default casing behavior, like Turkish system locales where 38 * tolower('I') maps to dotless i and toupper('i') maps to dotted I 39 * - where there are no lowercase Latin characters at all, or using different 40 * codes (some old EBCDIC codepages) 41 * 42 * This works because the compiler usually runs on a platform where the execution 43 * charset includes all of the invariant characters at their expected 44 * code positions, so that the char * string literals in ICU code match 45 * the char literals here. 46 * 47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 48 * and the set of uppercase Latin letters is discontiguous as well. 49 */ 50 51 U_CAPI UBool U_EXPORT2 52 uprv_isASCIILetter(char c) { 53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 54 return 55 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || 56 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); 57 #else 58 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 59 #endif 60 } 61 62 U_CAPI char U_EXPORT2 63 uprv_toupper(char c) { 64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 65 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 66 c=(char)(c+('A'-'a')); 67 } 68 #else 69 if('a'<=c && c<='z') { 70 c=(char)(c+('A'-'a')); 71 } 72 #endif 73 return c; 74 } 75 76 77 #if 0 78 /* 79 * Commented out because cstring.h defines uprv_tolower() to be 80 * the same as either uprv_asciitolower() or uprv_ebcdictolower() 81 * to reduce the amount of code to cover with tests. 82 * 83 * Note that this uprv_tolower() definition is likely to work for most 84 * charset families, not just ASCII and EBCDIC, because its #else branch 85 * is written generically. 86 */ 87 U_CAPI char U_EXPORT2 88 uprv_tolower(char c) { 89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 90 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 91 c=(char)(c+('a'-'A')); 92 } 93 #else 94 if('A'<=c && c<='Z') { 95 c=(char)(c+('a'-'A')); 96 } 97 #endif 98 return c; 99 } 100 #endif 101 102 U_CAPI char U_EXPORT2 103 uprv_asciitolower(char c) { 104 if(0x41<=c && c<=0x5a) { 105 c=(char)(c+0x20); 106 } 107 return c; 108 } 109 110 U_CAPI char U_EXPORT2 111 uprv_ebcdictolower(char c) { 112 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 113 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 114 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 115 ) { 116 c=(char)(c-0x40); 117 } 118 return c; 119 } 120 121 122 U_CAPI char* U_EXPORT2 123 T_CString_toLowerCase(char* str) 124 { 125 char* origPtr = str; 126 127 if (str) { 128 do 129 *str = uprv_tolower(*str); 130 while (*(str++)); 131 } 132 133 return origPtr; 134 } 135 136 U_CAPI char* U_EXPORT2 137 T_CString_toUpperCase(char* str) 138 { 139 char* origPtr = str; 140 141 if (str) { 142 do 143 *str = uprv_toupper(*str); 144 while (*(str++)); 145 } 146 147 return origPtr; 148 } 149 150 /* 151 * Takes a int32_t and fills in a char* string with that number "radix"-based. 152 * Does not handle negative values (makes an empty string for them). 153 * Writes at most 12 chars ("-2147483647" plus NUL). 154 * Returns the length of the string (not including the NUL). 155 */ 156 U_CAPI int32_t U_EXPORT2 157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 158 { 159 char tbuf[30]; 160 int32_t tbx = sizeof(tbuf); 161 uint8_t digit; 162 int32_t length = 0; 163 uint32_t uval; 164 165 U_ASSERT(radix>=2 && radix<=16); 166 uval = (uint32_t) v; 167 if(v<0 && radix == 10) { 168 /* Only in base 10 do we conside numbers to be signed. */ 169 uval = (uint32_t)(-v); 170 buffer[length++] = '-'; 171 } 172 173 tbx = sizeof(tbuf)-1; 174 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 175 do { 176 digit = (uint8_t)(uval % radix); 177 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 178 uval = uval / radix; 179 } while (uval != 0); 180 181 /* copy converted number into user buffer */ 182 uprv_strcpy(buffer+length, tbuf+tbx); 183 length += sizeof(tbuf) - tbx -1; 184 return length; 185 } 186 187 188 189 /* 190 * Takes a int64_t and fills in a char* string with that number "radix"-based. 191 * Writes at most 21: chars ("-9223372036854775807" plus NUL). 192 * Returns the length of the string, not including the terminating NUL. 193 */ 194 U_CAPI int32_t U_EXPORT2 195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 196 { 197 char tbuf[30]; 198 int32_t tbx = sizeof(tbuf); 199 uint8_t digit; 200 int32_t length = 0; 201 uint64_t uval; 202 203 U_ASSERT(radix>=2 && radix<=16); 204 uval = (uint64_t) v; 205 if(v<0 && radix == 10) { 206 /* Only in base 10 do we conside numbers to be signed. */ 207 uval = (uint64_t)(-v); 208 buffer[length++] = '-'; 209 } 210 211 tbx = sizeof(tbuf)-1; 212 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 213 do { 214 digit = (uint8_t)(uval % radix); 215 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 216 uval = uval / radix; 217 } while (uval != 0); 218 219 /* copy converted number into user buffer */ 220 uprv_strcpy(buffer+length, tbuf+tbx); 221 length += sizeof(tbuf) - tbx -1; 222 return length; 223 } 224 225 226 U_CAPI int32_t U_EXPORT2 227 T_CString_stringToInteger(const char *integerString, int32_t radix) 228 { 229 char *end; 230 return uprv_strtoul(integerString, &end, radix); 231 232 } 233 234 U_CAPI int U_EXPORT2 235 uprv_stricmp(const char *str1, const char *str2) { 236 if(str1==nullptr) { 237 if(str2==nullptr) { 238 return 0; 239 } else { 240 return -1; 241 } 242 } else if(str2==nullptr) { 243 return 1; 244 } else { 245 /* compare non-nullptr strings lexically with lowercase */ 246 int rc; 247 unsigned char c1, c2; 248 249 for(;;) { 250 c1=(unsigned char)*str1; 251 c2=(unsigned char)*str2; 252 if(c1==0) { 253 if(c2==0) { 254 return 0; 255 } else { 256 return -1; 257 } 258 } else if(c2==0) { 259 return 1; 260 } else { 261 /* compare non-zero characters with lowercase */ 262 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 263 if(rc!=0) { 264 return rc; 265 } 266 } 267 ++str1; 268 ++str2; 269 } 270 } 271 } 272 273 U_CAPI int U_EXPORT2 274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { 275 if(str1==nullptr) { 276 if(str2==nullptr) { 277 return 0; 278 } else { 279 return -1; 280 } 281 } else if(str2==nullptr) { 282 return 1; 283 } else { 284 /* compare non-nullptr strings lexically with lowercase */ 285 int rc; 286 unsigned char c1, c2; 287 288 for(; n--;) { 289 c1=(unsigned char)*str1; 290 c2=(unsigned char)*str2; 291 if(c1==0) { 292 if(c2==0) { 293 return 0; 294 } else { 295 return -1; 296 } 297 } else if(c2==0) { 298 return 1; 299 } else { 300 /* compare non-zero characters with lowercase */ 301 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 302 if(rc!=0) { 303 return rc; 304 } 305 } 306 ++str1; 307 ++str2; 308 } 309 } 310 311 return 0; 312 } 313 314 U_CAPI char* U_EXPORT2 315 uprv_strdup(const char *src) { 316 size_t len = uprv_strlen(src) + 1; 317 char *dup = (char *) uprv_malloc(len); 318 319 if (dup) { 320 uprv_memcpy(dup, src, len); 321 } 322 323 return dup; 324 } 325 326 U_CAPI char* U_EXPORT2 327 uprv_strndup(const char *src, int32_t n) { 328 char *dup; 329 330 if(n < 0) { 331 dup = uprv_strdup(src); 332 } else { 333 dup = (char*)uprv_malloc(n+1); 334 if (dup) { 335 uprv_memcpy(dup, src, n); 336 dup[n] = 0; 337 } 338 } 339 340 return dup; 341 }