tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uinvchar.h (5852B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  uinvchar.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:2
     14 *
     15 *   created on: 2004sep14
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Definitions for handling invariant characters, moved here from putil.c
     19 *   for better modularization.
     20 */
     21 
     22 #ifndef __UINVCHAR_H__
     23 #define __UINVCHAR_H__
     24 
     25 #include "unicode/utypes.h"
     26 #ifdef __cplusplus
     27 #include "unicode/unistr.h"
     28 #endif
     29 
     30 /**
     31 * Check if a char string only contains invariant characters.
     32 * See utypes.h for details.
     33 *
     34 * @param s Input string pointer.
     35 * @param length Length of the string, can be -1 if NUL-terminated.
     36 * @return true if s contains only invariant characters.
     37 *
     38 * @internal (ICU 2.8)
     39 */
     40 U_CAPI UBool U_EXPORT2
     41 uprv_isInvariantString(const char *s, int32_t length);
     42 
     43 /**
     44 * Check if a Unicode string only contains invariant characters.
     45 * See utypes.h for details.
     46 *
     47 * @param s Input string pointer.
     48 * @param length Length of the string, can be -1 if NUL-terminated.
     49 * @return true if s contains only invariant characters.
     50 *
     51 * @internal (ICU 2.8)
     52 */
     53 U_CAPI UBool U_EXPORT2
     54 uprv_isInvariantUString(const UChar *s, int32_t length);
     55 
     56 /**
     57 * \def U_UPPER_ORDINAL
     58 * Get the ordinal number of an uppercase invariant character
     59 * @internal
     60 */
     61 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     62 #   define U_UPPER_ORDINAL(x) ((x)-'A')
     63 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     64 #   define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
     65                              (((x) < 'S') ? ((x)-'J'+9) : \
     66                               ((x)-'S'+18)))
     67 #else
     68 #   error Unknown charset family!
     69 #endif
     70 
     71 #ifdef __cplusplus
     72 
     73 U_NAMESPACE_BEGIN
     74 
     75 /**
     76 * Like U_UPPER_ORDINAL(x) but with validation.
     77 * Returns 0..25 for A..Z else a value outside 0..25.
     78 */
     79 inline int32_t uprv_upperOrdinal(int32_t c) {
     80 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     81    return c - 'A';
     82 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     83    // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
     84    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
     85    if (c <= 'I') { return c - 'A'; }  // A-I --> 0-8
     86    if (c < 'J') { return -1; }
     87    if (c <= 'R') { return c - 'J' + 9; }  // J-R --> 9..17
     88    if (c < 'S') { return -1; }
     89    return c - 'S' + 18;  // S-Z --> 18..25
     90 #else
     91 #   error Unknown charset family!
     92 #endif
     93 }
     94 
     95 // Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
     96 // Returns 0..25 for a..z else a value outside 0..25.
     97 inline int32_t uprv_lowerOrdinal(int32_t c) {
     98 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     99    return c - 'a';
    100 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    101    // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
    102    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
    103    if (c <= 'i') { return c - 'a'; }  // a-i --> 0-8
    104    if (c < 'j') { return -1; }
    105    if (c <= 'r') { return c - 'j' + 9; }  // j-r --> 9..17
    106    if (c < 's') { return -1; }
    107    return c - 's' + 18;  // s-z --> 18..25
    108 #else
    109 #   error Unknown charset family!
    110 #endif
    111 }
    112 
    113 U_NAMESPACE_END
    114 
    115 #endif
    116 
    117 /**
    118 * Returns true if c == '@' is possible.
    119 * The @ sign is variant, and the @ sign used on one
    120 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
    121 * @internal
    122 */
    123 U_CAPI UBool
    124 uprv_isEbcdicAtSign(char c);
    125 
    126 /**
    127 * \def uprv_isAtSign
    128 * Returns true if c == '@' is possible.
    129 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
    130 * @internal
    131 */
    132 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    133 #   define uprv_isAtSign(c) ((c)=='@')
    134 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    135 #   define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
    136 #else
    137 #   error Unknown charset family!
    138 #endif
    139 
    140 /**
    141 * Compare two EBCDIC invariant-character strings in ASCII order.
    142 * @internal
    143 */
    144 U_CAPI int32_t U_EXPORT2
    145 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
    146 
    147 /**
    148 * \def uprv_compareInvCharsAsAscii
    149 * Compare two invariant-character strings in ASCII order.
    150 * @internal
    151 */
    152 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    153 #   define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
    154 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    155 #   define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
    156 #else
    157 #   error Unknown charset family!
    158 #endif
    159 
    160 /**
    161 * Converts an EBCDIC invariant character to ASCII.
    162 * @internal
    163 */
    164 U_CAPI char U_EXPORT2
    165 uprv_ebcdicToAscii(char c);
    166 
    167 /**
    168 * \def uprv_invCharToAscii
    169 * Converts an invariant character to ASCII.
    170 * @internal
    171 */
    172 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    173 #   define uprv_invCharToAscii(c) (c)
    174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    175 #   define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
    176 #else
    177 #   error Unknown charset family!
    178 #endif
    179 
    180 /**
    181 * Converts an EBCDIC invariant character to lowercase ASCII.
    182 * @internal
    183 */
    184 U_CAPI char U_EXPORT2
    185 uprv_ebcdicToLowercaseAscii(char c);
    186 
    187 /**
    188 * \def uprv_invCharToLowercaseAscii
    189 * Converts an invariant character to lowercase ASCII.
    190 * @internal
    191 */
    192 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    193 #   define uprv_invCharToLowercaseAscii uprv_asciitolower
    194 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    195 #   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
    196 #else
    197 #   error Unknown charset family!
    198 #endif
    199 
    200 /**
    201 * Copy EBCDIC to ASCII
    202 * @internal
    203 * @see uprv_strncpy
    204 */
    205 U_CAPI uint8_t* U_EXPORT2
    206 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
    207 
    208 
    209 /**
    210 * Copy ASCII to EBCDIC
    211 * @internal
    212 * @see uprv_strncpy
    213 */
    214 U_CAPI uint8_t* U_EXPORT2
    215 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
    216 
    217 
    218 
    219 #endif