uinvchar.h (5852B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1999-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: uinvchar.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:2 14 * 15 * created on: 2004sep14 16 * created by: Markus W. Scherer 17 * 18 * Definitions for handling invariant characters, moved here from putil.c 19 * for better modularization. 20 */ 21 22 #ifndef __UINVCHAR_H__ 23 #define __UINVCHAR_H__ 24 25 #include "unicode/utypes.h" 26 #ifdef __cplusplus 27 #include "unicode/unistr.h" 28 #endif 29 30 /** 31 * Check if a char string only contains invariant characters. 32 * See utypes.h for details. 33 * 34 * @param s Input string pointer. 35 * @param length Length of the string, can be -1 if NUL-terminated. 36 * @return true if s contains only invariant characters. 37 * 38 * @internal (ICU 2.8) 39 */ 40 U_CAPI UBool U_EXPORT2 41 uprv_isInvariantString(const char *s, int32_t length); 42 43 /** 44 * Check if a Unicode string only contains invariant characters. 45 * See utypes.h for details. 46 * 47 * @param s Input string pointer. 48 * @param length Length of the string, can be -1 if NUL-terminated. 49 * @return true if s contains only invariant characters. 50 * 51 * @internal (ICU 2.8) 52 */ 53 U_CAPI UBool U_EXPORT2 54 uprv_isInvariantUString(const UChar *s, int32_t length); 55 56 /** 57 * \def U_UPPER_ORDINAL 58 * Get the ordinal number of an uppercase invariant character 59 * @internal 60 */ 61 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 62 # define U_UPPER_ORDINAL(x) ((x)-'A') 63 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 64 # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ 65 (((x) < 'S') ? ((x)-'J'+9) : \ 66 ((x)-'S'+18))) 67 #else 68 # error Unknown charset family! 69 #endif 70 71 #ifdef __cplusplus 72 73 U_NAMESPACE_BEGIN 74 75 /** 76 * Like U_UPPER_ORDINAL(x) but with validation. 77 * Returns 0..25 for A..Z else a value outside 0..25. 78 */ 79 inline int32_t uprv_upperOrdinal(int32_t c) { 80 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 81 return c - 'A'; 82 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 83 // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). 84 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout 85 if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 86 if (c < 'J') { return -1; } 87 if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 88 if (c < 'S') { return -1; } 89 return c - 'S' + 18; // S-Z --> 18..25 90 #else 91 # error Unknown charset family! 92 #endif 93 } 94 95 // Like U_UPPER_ORDINAL(x) but for lowercase and with validation. 96 // Returns 0..25 for a..z else a value outside 0..25. 97 inline int32_t uprv_lowerOrdinal(int32_t c) { 98 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 99 return c - 'a'; 100 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 101 // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). 102 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout 103 if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 104 if (c < 'j') { return -1; } 105 if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 106 if (c < 's') { return -1; } 107 return c - 's' + 18; // s-z --> 18..25 108 #else 109 # error Unknown charset family! 110 #endif 111 } 112 113 U_NAMESPACE_END 114 115 #endif 116 117 /** 118 * Returns true if c == '@' is possible. 119 * The @ sign is variant, and the @ sign used on one 120 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. 121 * @internal 122 */ 123 U_CAPI UBool 124 uprv_isEbcdicAtSign(char c); 125 126 /** 127 * \def uprv_isAtSign 128 * Returns true if c == '@' is possible. 129 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). 130 * @internal 131 */ 132 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 133 # define uprv_isAtSign(c) ((c)=='@') 134 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 135 # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) 136 #else 137 # error Unknown charset family! 138 #endif 139 140 /** 141 * Compare two EBCDIC invariant-character strings in ASCII order. 142 * @internal 143 */ 144 U_CAPI int32_t U_EXPORT2 145 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); 146 147 /** 148 * \def uprv_compareInvCharsAsAscii 149 * Compare two invariant-character strings in ASCII order. 150 * @internal 151 */ 152 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 153 # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) 154 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 155 # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) 156 #else 157 # error Unknown charset family! 158 #endif 159 160 /** 161 * Converts an EBCDIC invariant character to ASCII. 162 * @internal 163 */ 164 U_CAPI char U_EXPORT2 165 uprv_ebcdicToAscii(char c); 166 167 /** 168 * \def uprv_invCharToAscii 169 * Converts an invariant character to ASCII. 170 * @internal 171 */ 172 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 173 # define uprv_invCharToAscii(c) (c) 174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 175 # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) 176 #else 177 # error Unknown charset family! 178 #endif 179 180 /** 181 * Converts an EBCDIC invariant character to lowercase ASCII. 182 * @internal 183 */ 184 U_CAPI char U_EXPORT2 185 uprv_ebcdicToLowercaseAscii(char c); 186 187 /** 188 * \def uprv_invCharToLowercaseAscii 189 * Converts an invariant character to lowercase ASCII. 190 * @internal 191 */ 192 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 193 # define uprv_invCharToLowercaseAscii uprv_asciitolower 194 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 195 # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii 196 #else 197 # error Unknown charset family! 198 #endif 199 200 /** 201 * Copy EBCDIC to ASCII 202 * @internal 203 * @see uprv_strncpy 204 */ 205 U_CAPI uint8_t* U_EXPORT2 206 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); 207 208 209 /** 210 * Copy ASCII to EBCDIC 211 * @internal 212 * @see uprv_strncpy 213 */ 214 U_CAPI uint8_t* U_EXPORT2 215 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); 216 217 218 219 #endif