tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uinvchar.cpp (19464B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2010, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  uinvchar.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:2
     14 *
     15 *   created on: 2004sep14
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Functions for handling invariant characters, moved here from putil.c
     19 *   for better modularization.
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/ustring.h"
     24 #include "udataswp.h"
     25 #include "cstring.h"
     26 #include "cmemory.h"
     27 #include "uassert.h"
     28 #include "uinvchar.h"
     29 
     30 /* invariant-character handling --------------------------------------------- */
     31 
     32 /*
     33 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
     34 * appropriately for most EBCDIC codepages.
     35 *
     36 * They currently also map most other ASCII graphic characters,
     37 * appropriately for codepages 37 and 1047.
     38 * Exceptions: The characters for []^ have different codes in 37 & 1047.
     39 * Both versions are mapped to ASCII.
     40 *
     41 *    ASCII 37 1047
     42 * [     5B BA   AD
     43 * ]     5D BB   BD
     44 * ^     5E B0   5F
     45 *
     46 * There are no mappings for variant characters from Unicode to EBCDIC.
     47 *
     48 * Currently, C0 control codes are also included in these maps.
     49 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
     50 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
     51 * but there is no mapping for ASCII LF back to EBCDIC.
     52 *
     53 *    ASCII EBCDIC S/390-OE
     54 * LF    0A     25       15
     55 * NEL   85     15       25
     56 *
     57 * The maps below explicitly exclude the variant
     58 * control and graphical characters that are in ASCII-based
     59 * codepages at 0x80 and above.
     60 * "No mapping" is expressed by mapping to a 00 byte.
     61 *
     62 * These tables do not establish a converter or a codepage.
     63 */
     64 
     65 static const uint8_t asciiFromEbcdic[256]={
     66    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     67    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
     68    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
     69    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
     70 
     71    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
     72    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
     73    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
     74    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
     75 
     76    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     77    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     78    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
     79    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
     80 
     81    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     82    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     83    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     84    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     85 };
     86 
     87 static const uint8_t ebcdicFromAscii[256]={
     88    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     89    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
     90    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
     91    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
     92 
     93    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
     94    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
     95    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
     96    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
     97 
     98    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     99    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    100    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    101    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    102 
    103    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    104    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    105    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    106    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    107 };
    108 
    109 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
    110 static const uint8_t lowercaseAsciiFromEbcdic[256]={
    111    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    112    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
    113    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
    114    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
    115 
    116    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    117    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
    118    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    119    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
    120 
    121    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    122    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    123    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
    124    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
    125 
    126    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    127    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    128    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    129    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
    130 };
    131 
    132 /*
    133 * Bit sets indicating which characters of the ASCII repertoire
    134 * (by ASCII/Unicode code) are "invariant".
    135 * See utypes.h for more details.
    136 *
    137 * As invariant are considered the characters of the ASCII repertoire except
    138 * for the following:
    139 * 21  '!' <exclamation mark>
    140 * 23  '#' <number sign>
    141 * 24  '$' <dollar sign>
    142 *
    143 * 40  '@' <commercial at>
    144 *
    145 * 5b  '[' <left bracket>
    146 * 5c  '\' <backslash>
    147 * 5d  ']' <right bracket>
    148 * 5e  '^' <circumflex>
    149 *
    150 * 60  '`' <grave accent>
    151 *
    152 * 7b  '{' <left brace>
    153 * 7c  '|' <vertical line>
    154 * 7d  '}' <right brace>
    155 * 7e  '~' <tilde>
    156 */
    157 static const uint32_t invariantChars[4]={
    158    0xfffffbff, /* 00..1f but not 0a */
    159    0xffffffe5, /* 20..3f but not 21 23 24 */
    160    0x87fffffe, /* 40..5f but not 40 5b..5e */
    161    0x87fffffe  /* 60..7f but not 60 7b..7e */
    162 };
    163 
    164 /*
    165 * test unsigned types (or values known to be non-negative) for invariant characters,
    166 * tests ASCII-family character values
    167 */
    168 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
    169 
    170 /* test signed types for invariant characters, adds test for positive values */
    171 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
    172 
    173 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    174 #define CHAR_TO_UCHAR(c) c
    175 #define UCHAR_TO_CHAR(c) c
    176 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    177 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
    178 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
    179 #else
    180 #   error U_CHARSET_FAMILY is not valid
    181 #endif
    182 
    183 
    184 U_CAPI void U_EXPORT2
    185 u_charsToUChars(const char *cs, char16_t *us, int32_t length) {
    186    char16_t u;
    187    uint8_t c;
    188 
    189    /*
    190     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
    191     * For EBCDIC systems, this works for characters with codes from
    192     * codepages 37 and 1047 or compatible.
    193     */
    194    while(length>0) {
    195        c=(uint8_t)(*cs++);
    196        u=(char16_t)CHAR_TO_UCHAR(c);
    197        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
    198        *us++=u;
    199        --length;
    200    }
    201 }
    202 
    203 U_CAPI void U_EXPORT2
    204 u_UCharsToChars(const char16_t *us, char *cs, int32_t length) {
    205    char16_t u;
    206 
    207    while(length>0) {
    208        u=*us++;
    209        if(!UCHAR_IS_INVARIANT(u)) {
    210            U_ASSERT(false); /* Variant characters were used. These are not portable in ICU. */
    211            u=0;
    212        }
    213        *cs++=(char)UCHAR_TO_CHAR(u);
    214        --length;
    215    }
    216 }
    217 
    218 U_CAPI UBool U_EXPORT2
    219 uprv_isInvariantString(const char *s, int32_t length) {
    220    uint8_t c;
    221 
    222    for(;;) {
    223        if(length<0) {
    224            /* NUL-terminated */
    225            c=(uint8_t)*s++;
    226            if(c==0) {
    227                break;
    228            }
    229        } else {
    230            /* count length */
    231            if(length==0) {
    232                break;
    233            }
    234            --length;
    235            c=(uint8_t)*s++;
    236            if(c==0) {
    237                continue; /* NUL is invariant */
    238            }
    239        }
    240        /* c!=0 now, one branch below checks c==0 for variant characters */
    241 
    242        /*
    243         * no assertions here because these functions are legitimately called
    244         * for strings with variant characters
    245         */
    246 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    247        if(!UCHAR_IS_INVARIANT(c)) {
    248            return false; /* found a variant char */
    249        }
    250 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    251        c=CHAR_TO_UCHAR(c);
    252        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
    253            return false; /* found a variant char */
    254        }
    255 #else
    256 #   error U_CHARSET_FAMILY is not valid
    257 #endif
    258    }
    259    return true;
    260 }
    261 
    262 U_CAPI UBool U_EXPORT2
    263 uprv_isInvariantUString(const char16_t *s, int32_t length) {
    264    char16_t c;
    265 
    266    for(;;) {
    267        if(length<0) {
    268            /* NUL-terminated */
    269            c=*s++;
    270            if(c==0) {
    271                break;
    272            }
    273        } else {
    274            /* count length */
    275            if(length==0) {
    276                break;
    277            }
    278            --length;
    279            c=*s++;
    280        }
    281 
    282        /*
    283         * no assertions here because these functions are legitimately called
    284         * for strings with variant characters
    285         */
    286        if(!UCHAR_IS_INVARIANT(c)) {
    287            return false; /* found a variant char */
    288        }
    289    }
    290    return true;
    291 }
    292 
    293 /* UDataSwapFn implementations used in udataswp.c ------- */
    294 
    295 /* convert ASCII to EBCDIC and verify that all characters are invariant */
    296 U_CAPI int32_t U_EXPORT2
    297 uprv_ebcdicFromAscii(const UDataSwapper *ds,
    298                     const void *inData, int32_t length, void *outData,
    299                     UErrorCode *pErrorCode) {
    300    const uint8_t *s;
    301    uint8_t *t;
    302    uint8_t c;
    303 
    304    int32_t count;
    305 
    306    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    307        return 0;
    308    }
    309    if(ds==nullptr || inData==nullptr || length<0 || (length>0 && outData==nullptr)) {
    310        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    311        return 0;
    312    }
    313 
    314    /* setup and swapping */
    315    s=(const uint8_t *)inData;
    316    t=(uint8_t *)outData;
    317    count=length;
    318    while(count>0) {
    319        c=*s++;
    320        if(!UCHAR_IS_INVARIANT(c)) {
    321            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
    322                             length, length-count);
    323            *pErrorCode=U_INVALID_CHAR_FOUND;
    324            return 0;
    325        }
    326        *t++=ebcdicFromAscii[c];
    327        --count;
    328    }
    329 
    330    return length;
    331 }
    332 
    333 /* this function only checks and copies ASCII strings without conversion */
    334 U_CFUNC int32_t
    335 uprv_copyAscii(const UDataSwapper *ds,
    336               const void *inData, int32_t length, void *outData,
    337               UErrorCode *pErrorCode) {
    338    const uint8_t *s;
    339    uint8_t c;
    340 
    341    int32_t count;
    342 
    343    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    344        return 0;
    345    }
    346    if(ds==nullptr || inData==nullptr || length<0 || (length>0 && outData==nullptr)) {
    347        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    348        return 0;
    349    }
    350 
    351    /* setup and checking */
    352    s=(const uint8_t *)inData;
    353    count=length;
    354    while(count>0) {
    355        c=*s++;
    356        if(!UCHAR_IS_INVARIANT(c)) {
    357            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
    358                             length, length-count);
    359            *pErrorCode=U_INVALID_CHAR_FOUND;
    360            return 0;
    361        }
    362        --count;
    363    }
    364 
    365    if(length>0 && inData!=outData) {
    366        uprv_memcpy(outData, inData, length);
    367    }
    368 
    369    return length;
    370 }
    371 
    372 /* convert EBCDIC to ASCII and verify that all characters are invariant */
    373 U_CFUNC int32_t
    374 uprv_asciiFromEbcdic(const UDataSwapper *ds,
    375                     const void *inData, int32_t length, void *outData,
    376                     UErrorCode *pErrorCode) {
    377    const uint8_t *s;
    378    uint8_t *t;
    379    uint8_t c;
    380 
    381    int32_t count;
    382 
    383    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    384        return 0;
    385    }
    386    if(ds==nullptr || inData==nullptr || length<0 ||  (length>0 && outData==nullptr)) {
    387        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    388        return 0;
    389    }
    390 
    391    /* setup and swapping */
    392    s=(const uint8_t *)inData;
    393    t=(uint8_t *)outData;
    394    count=length;
    395    while(count>0) {
    396        c=*s++;
    397        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
    398            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
    399                             length, length-count);
    400            *pErrorCode=U_INVALID_CHAR_FOUND;
    401            return 0;
    402        }
    403        *t++=c;
    404        --count;
    405    }
    406 
    407    return length;
    408 }
    409 
    410 /* this function only checks and copies EBCDIC strings without conversion */
    411 U_CFUNC int32_t
    412 uprv_copyEbcdic(const UDataSwapper *ds,
    413                const void *inData, int32_t length, void *outData,
    414                UErrorCode *pErrorCode) {
    415    const uint8_t *s;
    416    uint8_t c;
    417 
    418    int32_t count;
    419 
    420    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    421        return 0;
    422    }
    423    if(ds==nullptr || inData==nullptr || length<0 || (length>0 && outData==nullptr)) {
    424        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    425        return 0;
    426    }
    427 
    428    /* setup and checking */
    429    s=(const uint8_t *)inData;
    430    count=length;
    431    while(count>0) {
    432        c=*s++;
    433        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
    434            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
    435                             length, length-count);
    436            *pErrorCode=U_INVALID_CHAR_FOUND;
    437            return 0;
    438        }
    439        --count;
    440    }
    441 
    442    if(length>0 && inData!=outData) {
    443        uprv_memcpy(outData, inData, length);
    444    }
    445 
    446    return length;
    447 }
    448 
    449 U_CAPI UBool
    450 uprv_isEbcdicAtSign(char c) {
    451    static const uint8_t ebcdicAtSigns[] = {
    452        0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
    453    return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
    454 }
    455 
    456 /* compare invariant strings; variant characters compare less than others and unlike each other */
    457 U_CFUNC int32_t
    458 uprv_compareInvAscii(const UDataSwapper *ds,
    459                     const char *outString, int32_t outLength,
    460                     const char16_t *localString, int32_t localLength) {
    461    (void)ds;
    462    int32_t minLength;
    463    UChar32 c1, c2;
    464    uint8_t c;
    465 
    466    if(outString==nullptr || outLength<-1 || localString==nullptr || localLength<-1) {
    467        return 0;
    468    }
    469 
    470    if(outLength<0) {
    471        outLength=(int32_t)uprv_strlen(outString);
    472    }
    473    if(localLength<0) {
    474        localLength=u_strlen(localString);
    475    }
    476 
    477    minLength= outLength<localLength ? outLength : localLength;
    478 
    479    while(minLength>0) {
    480        c=(uint8_t)*outString++;
    481        if(UCHAR_IS_INVARIANT(c)) {
    482            c1=c;
    483        } else {
    484            c1=-1;
    485        }
    486 
    487        c2=*localString++;
    488        if(!UCHAR_IS_INVARIANT(c2)) {
    489            c2=-2;
    490        }
    491 
    492        if((c1-=c2)!=0) {
    493            return c1;
    494        }
    495 
    496        --minLength;
    497    }
    498 
    499    /* strings start with same prefix, compare lengths */
    500    return outLength-localLength;
    501 }
    502 
    503 U_CFUNC int32_t
    504 uprv_compareInvEbcdic(const UDataSwapper *ds,
    505                      const char *outString, int32_t outLength,
    506                      const char16_t *localString, int32_t localLength) {
    507    (void)ds;
    508    int32_t minLength;
    509    UChar32 c1, c2;
    510    uint8_t c;
    511 
    512    if(outString==nullptr || outLength<-1 || localString==nullptr || localLength<-1) {
    513        return 0;
    514    }
    515 
    516    if(outLength<0) {
    517        outLength=(int32_t)uprv_strlen(outString);
    518    }
    519    if(localLength<0) {
    520        localLength=u_strlen(localString);
    521    }
    522 
    523    minLength= outLength<localLength ? outLength : localLength;
    524 
    525    while(minLength>0) {
    526        c=(uint8_t)*outString++;
    527        if(c==0) {
    528            c1=0;
    529        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
    530            /* c1 is set */
    531        } else {
    532            c1=-1;
    533        }
    534 
    535        c2=*localString++;
    536        if(!UCHAR_IS_INVARIANT(c2)) {
    537            c2=-2;
    538        }
    539 
    540        if((c1-=c2)!=0) {
    541            return c1;
    542        }
    543 
    544        --minLength;
    545    }
    546 
    547    /* strings start with same prefix, compare lengths */
    548    return outLength-localLength;
    549 }
    550 
    551 U_CAPI int32_t U_EXPORT2
    552 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
    553    int32_t c1, c2;
    554 
    555    for(;; ++s1, ++s2) {
    556        c1=(uint8_t)*s1;
    557        c2=(uint8_t)*s2;
    558        if(c1!=c2) {
    559            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
    560                c1=-(int32_t)(uint8_t)*s1;
    561            }
    562            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
    563                c2=-(int32_t)(uint8_t)*s2;
    564            }
    565            return c1-c2;
    566        } else if(c1==0) {
    567            return 0;
    568        }
    569    }
    570 }
    571 
    572 U_CAPI char U_EXPORT2
    573 uprv_ebcdicToAscii(char c) {
    574    return (char)asciiFromEbcdic[(uint8_t)c];
    575 }
    576 
    577 U_CAPI char U_EXPORT2
    578 uprv_ebcdicToLowercaseAscii(char c) {
    579    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
    580 }
    581 
    582 U_CAPI uint8_t* U_EXPORT2
    583 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
    584 {
    585  uint8_t *orig_dst = dst;
    586 
    587  if(n==-1) { 
    588    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
    589  }
    590  /* copy non-null */
    591  while(*src && n>0) {
    592    *(dst++) = asciiFromEbcdic[*(src++)];
    593    n--;
    594  }
    595  /* pad */
    596  while(n>0) {
    597    *(dst++) = 0;
    598    n--;
    599  }
    600  return orig_dst;
    601 }
    602 
    603 U_CAPI uint8_t* U_EXPORT2
    604 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
    605 {
    606  uint8_t *orig_dst = dst;
    607 
    608  if(n==-1) { 
    609    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
    610  }
    611  /* copy non-null */
    612  while(*src && n>0) {
    613    char ch = ebcdicFromAscii[*(src++)];
    614    if(ch == 0) {
    615      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
    616    }
    617    *(dst++) = ch;
    618    n--;
    619  }
    620  /* pad */
    621  while(n>0) {
    622    *(dst++) = 0;
    623    n--;
    624  }
    625  return orig_dst;
    626 }