tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uscanf_p.cpp (44489B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File uscnnf_p.c
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   12/02/98    stephen        Creation.
     17 *   03/13/99    stephen     Modified for new C API.
     18 *******************************************************************************
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 
     23 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
     24 
     25 #include "unicode/uchar.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/unum.h"
     28 #include "unicode/udat.h"
     29 #include "unicode/uset.h"
     30 #include "uscanf.h"
     31 #include "ufmt_cmn.h"
     32 #include "ufile.h"
     33 #include "locbund.h"
     34 
     35 #include "cmemory.h"
     36 #include "ustr_cnv.h"
     37 
     38 /* flag characters for u_scanf */
     39 #define FLAG_ASTERISK 0x002A
     40 #define FLAG_PAREN 0x0028
     41 
     42 #define ISFLAG(s)    (s) == FLAG_ASTERISK || \
     43            (s) == FLAG_PAREN
     44 
     45 /* special characters for u_scanf */
     46 #define SPEC_DOLLARSIGN 0x0024
     47 
     48 /* unicode digits */
     49 #define DIGIT_ZERO 0x0030
     50 #define DIGIT_ONE 0x0031
     51 #define DIGIT_TWO 0x0032
     52 #define DIGIT_THREE 0x0033
     53 #define DIGIT_FOUR 0x0034
     54 #define DIGIT_FIVE 0x0035
     55 #define DIGIT_SIX 0x0036
     56 #define DIGIT_SEVEN 0x0037
     57 #define DIGIT_EIGHT 0x0038
     58 #define DIGIT_NINE 0x0039
     59 
     60 #define ISDIGIT(s)    (s) == DIGIT_ZERO || \
     61            (s) == DIGIT_ONE || \
     62            (s) == DIGIT_TWO || \
     63            (s) == DIGIT_THREE || \
     64            (s) == DIGIT_FOUR || \
     65            (s) == DIGIT_FIVE || \
     66            (s) == DIGIT_SIX || \
     67            (s) == DIGIT_SEVEN || \
     68            (s) == DIGIT_EIGHT || \
     69            (s) == DIGIT_NINE
     70 
     71 /* u_scanf modifiers */
     72 #define MOD_H 0x0068
     73 #define MOD_LOWERL 0x006C
     74 #define MOD_L 0x004C
     75 
     76 #define ISMOD(s)    (s) == MOD_H || \
     77            (s) == MOD_LOWERL || \
     78            (s) == MOD_L
     79 
     80 /**
     81 * Struct encapsulating a single uscanf format specification.
     82 */
     83 typedef struct u_scanf_spec_info {
     84    int32_t fWidth;         /* Width  */
     85 
     86    char16_t   fSpec;          /* Format specification  */
     87 
     88    char16_t   fPadChar;       /* Padding character  */
     89 
     90    UBool   fSkipArg;       /* true if arg should be skipped */
     91    UBool   fIsLongDouble;  /* L flag  */
     92    UBool   fIsShort;       /* h flag  */
     93    UBool   fIsLong;        /* l flag  */
     94    UBool   fIsLongLong;    /* ll flag  */
     95    UBool   fIsString;      /* true if this is a NUL-terminated string. */
     96 } u_scanf_spec_info;
     97 
     98 
     99 /**
    100 * Struct encapsulating a single u_scanf format specification.
    101 */
    102 typedef struct u_scanf_spec {
    103    u_scanf_spec_info    fInfo;        /* Information on this spec */
    104    int32_t        fArgPos;    /* Position of data in arg list */
    105 } u_scanf_spec;
    106 
    107 /**
    108 * Parse a single u_scanf format specifier in Unicode.
    109 * @param fmt A pointer to a '%' character in a u_scanf format specification.
    110 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
    111 * format specifier.
    112 * @return The number of characters contained in this specifier.
    113 */
    114 static int32_t
    115 u_scanf_parse_spec (const char16_t  *fmt,
    116            u_scanf_spec    *spec)
    117 {
    118    const char16_t *s = fmt;
    119    const char16_t *backup;
    120    u_scanf_spec_info *info = &(spec->fInfo);
    121 
    122    /* initialize spec to default values */
    123    spec->fArgPos             = -1;
    124 
    125    info->fWidth        = -1;
    126    info->fSpec         = 0x0000;
    127    info->fPadChar      = 0x0020;
    128    info->fSkipArg      = false;
    129    info->fIsLongDouble = false;
    130    info->fIsShort      = false;
    131    info->fIsLong       = false;
    132    info->fIsLongLong   = false;
    133    info->fIsString     = true;
    134 
    135 
    136    /* skip over the initial '%' */
    137    s++;
    138 
    139    /* Check for positional argument */
    140    if(ISDIGIT(*s)) {
    141 
    142        /* Save the current position */
    143        backup = s;
    144 
    145        /* handle positional parameters */
    146        if(ISDIGIT(*s)) {
    147            spec->fArgPos = *s++ - DIGIT_ZERO;
    148 
    149            while(ISDIGIT(*s)) {
    150                spec->fArgPos *= 10;
    151                spec->fArgPos += *s++ - DIGIT_ZERO;
    152            }
    153        }
    154 
    155        /* if there is no '$', don't read anything */
    156        if(*s != SPEC_DOLLARSIGN) {
    157            spec->fArgPos = -1;
    158            s = backup;
    159        }
    160        /* munge the '$' */
    161        else
    162            s++;
    163    }
    164 
    165    /* Get any format flags */
    166    while(ISFLAG(*s)) {
    167        switch(*s++) {
    168 
    169            /* skip argument */
    170        case FLAG_ASTERISK:
    171            info->fSkipArg = true;
    172            break;
    173 
    174            /* pad character specified */
    175        case FLAG_PAREN:
    176 
    177            /* first four characters are hex values for pad char */
    178            info->fPadChar = static_cast<char16_t>(ufmt_digitvalue(*s++));
    179            info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
    180            info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
    181            info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
    182 
    183            /* final character is ignored */
    184            s++;
    185 
    186            break;
    187        }
    188    }
    189 
    190    /* Get the width */
    191    if(ISDIGIT(*s)){
    192        info->fWidth = *s++ - DIGIT_ZERO;
    193 
    194        while(ISDIGIT(*s)) {
    195            info->fWidth *= 10;
    196            info->fWidth += *s++ - DIGIT_ZERO;
    197        }
    198    }
    199 
    200    /* Get any modifiers */
    201    if(ISMOD(*s)) {
    202        switch(*s++) {
    203 
    204            /* short */
    205        case MOD_H:
    206            info->fIsShort = true;
    207            break;
    208 
    209            /* long or long long */
    210        case MOD_LOWERL:
    211            if(*s == MOD_LOWERL) {
    212                info->fIsLongLong = true;
    213                /* skip over the next 'l' */
    214                s++;
    215            }
    216            else
    217                info->fIsLong = true;
    218            break;
    219 
    220            /* long double */
    221        case MOD_L:
    222            info->fIsLongDouble = true;
    223            break;
    224        }
    225    }
    226 
    227    /* finally, get the specifier letter */
    228    info->fSpec = *s++;
    229 
    230    /* return # of characters in this specifier */
    231    return static_cast<int32_t>(s - fmt);
    232 }
    233 
    234 #define UP_PERCENT 0x0025
    235 
    236 
    237 /* ANSI style formatting */
    238 /* Use US-ASCII characters only for formatting */
    239 
    240 /* % */
    241 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
    242 /* s */
    243 #define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
    244 /* c */
    245 #define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
    246 /* d, i */
    247 #define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
    248 /* u */
    249 #define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
    250 /* o */
    251 #define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
    252 /* x, X */
    253 #define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
    254 /* f */
    255 #define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
    256 /* e, E */
    257 #define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
    258 /* g, G */
    259 #define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
    260 /* n */
    261 #define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
    262 /* [ */
    263 #define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
    264 
    265 /* non-ANSI extensions */
    266 /* Use US-ASCII characters only for formatting */
    267 
    268 /* p */
    269 #define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
    270 /* V */
    271 #define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
    272 /* P */
    273 #define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
    274 /* C  K is old format */
    275 #define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
    276 /* S  U is old format */
    277 #define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
    278 
    279 
    280 #define UFMT_EMPTY {ufmt_empty, nullptr}
    281 
    282 /**
    283 * A u_scanf handler function.  
    284 * A u_scanf handler is responsible for handling a single u_scanf 
    285 * format specification, for example 'd' or 's'.
    286 * @param stream The UFILE to which to write output.
    287 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
    288 * information on the format specification.
    289 * @param args A pointer to the argument data
    290 * @param fmt A pointer to the first character in the format string
    291 * following the spec.
    292 * @param fmtConsumed On output, set to the number of characters consumed
    293 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
    294 * @param argConverted The number of arguments converted and assigned, or -1 if an
    295 * error occurred.
    296 * @return The number of code points consumed during reading.
    297 */
    298 typedef int32_t (*u_scanf_handler) (UFILE   *stream,
    299                   u_scanf_spec_info  *info,
    300                   ufmt_args                *args,
    301                   const char16_t           *fmt,
    302                   int32_t                  *fmtConsumed,
    303                   int32_t                  *argConverted);
    304 
    305 typedef struct u_scanf_info {
    306    ufmt_type_info info;
    307    u_scanf_handler handler;
    308 } u_scanf_info;
    309 
    310 #define USCANF_NUM_FMT_HANDLERS 108
    311 #define USCANF_SYMBOL_BUFFER_SIZE 8
    312 
    313 /* We do not use handlers for 0-0x1f */
    314 #define USCANF_BASE_FMT_HANDLERS 0x20
    315 
    316 
    317 static int32_t
    318 u_scanf_skip_leading_ws(UFILE   *input,
    319                        char16_t   pad)
    320 {
    321    char16_t   c;
    322    int32_t count = 0;
    323    UBool isNotEOF;
    324 
    325    /* skip all leading ws in the input */
    326    while (((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true)) && (c == pad || u_isWhitespace(c)))
    327    {
    328        count++;
    329    }
    330 
    331    /* put the final character back on the input */
    332    if(isNotEOF)
    333        u_fungetc(c, input);
    334 
    335    return count;
    336 }
    337 
    338 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
    339 static int32_t
    340 u_scanf_skip_leading_positive_sign(UFILE   *input,
    341                                   UNumberFormat *format,
    342                                   UErrorCode *status)
    343 {
    344    char16_t   c;
    345    int32_t count = 0;
    346    UBool isNotEOF;
    347    char16_t plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
    348    int32_t symbolLen;
    349    UErrorCode localStatus = U_ZERO_ERROR;
    350 
    351    if (U_SUCCESS(*status)) {
    352        symbolLen = unum_getSymbol(format,
    353            UNUM_PLUS_SIGN_SYMBOL,
    354            plusSymbol,
    355            UPRV_LENGTHOF(plusSymbol),
    356            &localStatus);
    357 
    358        if (U_SUCCESS(localStatus)) {
    359            /* skip all leading ws in the input */
    360            while (((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true)) && (count < symbolLen && c == plusSymbol[count]))
    361            {
    362                count++;
    363            }
    364 
    365            /* put the final character back on the input */
    366            if(isNotEOF) {
    367                u_fungetc(c, input);
    368            }
    369        }
    370    }
    371 
    372    return count;
    373 }
    374 
    375 static int32_t 
    376 u_scanf_simple_percent_handler(UFILE        *input,
    377                               u_scanf_spec_info *info,
    378                               ufmt_args    *args,
    379                               const char16_t  *fmt,
    380                               int32_t      *fmtConsumed,
    381                               int32_t      *argConverted)
    382 {
    383    (void)info;
    384    (void)args;
    385    (void)fmt;
    386    (void)fmtConsumed;
    387 
    388    /* make sure the next character in the input is a percent */
    389    *argConverted = 0;
    390    if(u_fgetc(input) != 0x0025) {
    391        *argConverted = -1;
    392    }
    393    return 1;
    394 }
    395 
    396 static int32_t
    397 u_scanf_count_handler(UFILE         *input,
    398                      u_scanf_spec_info *info,
    399                      ufmt_args     *args,
    400                      const char16_t   *fmt,
    401                      int32_t       *fmtConsumed,
    402                      int32_t       *argConverted)
    403 {
    404    (void)input;
    405    (void)fmt;
    406    (void)fmtConsumed;
    407 
    408    /* in the special case of count, the u_scanf_spec_info's width */
    409    /* will contain the # of items converted thus far */
    410    if (!info->fSkipArg) {
    411        if (info->fIsShort)
    412            *static_cast<int16_t*>(args[0].ptrValue) = static_cast<int16_t>(UINT16_MAX & info->fWidth);
    413        else if (info->fIsLongLong)
    414            *static_cast<int64_t*>(args[0].ptrValue) = info->fWidth;
    415        else
    416            *static_cast<int32_t*>(args[0].ptrValue) = static_cast<int32_t>(UINT32_MAX & info->fWidth);
    417    }
    418    *argConverted = 0;
    419 
    420    /* we converted 0 args */
    421    return 0;
    422 }
    423 
    424 static int32_t
    425 u_scanf_double_handler(UFILE        *input,
    426                       u_scanf_spec_info *info,
    427                       ufmt_args    *args,
    428                       const char16_t  *fmt,
    429                       int32_t      *fmtConsumed,
    430                       int32_t      *argConverted)
    431 {
    432    (void)fmt;
    433    (void)fmtConsumed;
    434 
    435    int32_t         len;
    436    double          num;
    437    UNumberFormat   *format;
    438    int32_t         parsePos    = 0;
    439    int32_t         skipped;
    440    UErrorCode      status      = U_ZERO_ERROR;
    441 
    442 
    443    /* skip all ws in the input */
    444    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    445 
    446    /* fill the input's internal buffer */
    447    ufile_fill_uchar_buffer(input);
    448 
    449    /* determine the size of the input's buffer */
    450    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
    451 
    452    /* truncate to the width, if specified */
    453    if(info->fWidth != -1)
    454        len = ufmt_min(len, info->fWidth);
    455 
    456    /* get the formatter */
    457    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
    458 
    459    /* handle error */
    460    if (format == nullptr)
    461        return 0;
    462 
    463    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    464    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
    465 
    466    /* parse the number */
    467    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
    468 
    469    if (!info->fSkipArg) {
    470        if (info->fIsLong)
    471            *static_cast<double*>(args[0].ptrValue) = num;
    472        else if (info->fIsLongDouble)
    473            *static_cast<long double*>(args[0].ptrValue) = num;
    474        else
    475            *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
    476    }
    477 
    478    /* mask off any necessary bits */
    479    /*  if(! info->fIsLong_double)
    480    num &= DBL_MAX;*/
    481 
    482    /* update the input's position to reflect consumed data */
    483    input->str.fPos += parsePos;
    484 
    485    /* we converted 1 arg */
    486    *argConverted = !info->fSkipArg;
    487    return parsePos + skipped;
    488 }
    489 
    490 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
    491 
    492 static int32_t
    493 u_scanf_scientific_handler(UFILE        *input,
    494                           u_scanf_spec_info *info,
    495                           ufmt_args    *args,
    496                           const char16_t  *fmt,
    497                           int32_t      *fmtConsumed,
    498                           int32_t      *argConverted)
    499 {
    500    (void)fmt;
    501    (void)fmtConsumed;
    502 
    503    int32_t         len;
    504    double          num;
    505    UNumberFormat   *format;
    506    int32_t         parsePos    = 0;
    507    int32_t         skipped;
    508    UErrorCode      status      = U_ZERO_ERROR;
    509    char16_t srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
    510    int32_t srcLen, expLen;
    511    char16_t expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
    512 
    513 
    514    /* skip all ws in the input */
    515    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    516 
    517    /* fill the input's internal buffer */
    518    ufile_fill_uchar_buffer(input);
    519 
    520    /* determine the size of the input's buffer */
    521    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
    522 
    523    /* truncate to the width, if specified */
    524    if(info->fWidth != -1)
    525        len = ufmt_min(len, info->fWidth);
    526 
    527    /* get the formatter */
    528    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
    529 
    530    /* handle error */
    531    if (format == nullptr)
    532        return 0;
    533 
    534    /* set the appropriate flags on the formatter */
    535 
    536    srcLen = unum_getSymbol(format,
    537        UNUM_EXPONENTIAL_SYMBOL,
    538        srcExpBuf,
    539        sizeof(srcExpBuf),
    540        &status);
    541 
    542    /* Upper/lower case the e */
    543    if (info->fSpec == static_cast<char16_t>(0x65) /* e */) {
    544        expLen = u_strToLower(expBuf, static_cast<int32_t>(sizeof(expBuf)),
    545            srcExpBuf, srcLen,
    546            input->str.fBundle.fLocale,
    547            &status);
    548    }
    549    else {
    550        expLen = u_strToUpper(expBuf, static_cast<int32_t>(sizeof(expBuf)),
    551            srcExpBuf, srcLen,
    552            input->str.fBundle.fLocale,
    553            &status);
    554    }
    555 
    556    unum_setSymbol(format,
    557        UNUM_EXPONENTIAL_SYMBOL,
    558        expBuf,
    559        expLen,
    560        &status);
    561 
    562 
    563 
    564 
    565    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    566    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
    567 
    568    /* parse the number */
    569    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
    570 
    571    if (!info->fSkipArg) {
    572        if (info->fIsLong)
    573            *static_cast<double*>(args[0].ptrValue) = num;
    574        else if (info->fIsLongDouble)
    575            *static_cast<long double*>(args[0].ptrValue) = num;
    576        else
    577            *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
    578    }
    579 
    580    /* mask off any necessary bits */
    581    /*  if(! info->fIsLong_double)
    582    num &= DBL_MAX;*/
    583 
    584    /* update the input's position to reflect consumed data */
    585    input->str.fPos += parsePos;
    586 
    587    /* we converted 1 arg */
    588    *argConverted = !info->fSkipArg;
    589    return parsePos + skipped;
    590 }
    591 
    592 static int32_t
    593 u_scanf_scidbl_handler(UFILE        *input,
    594                       u_scanf_spec_info *info,
    595                       ufmt_args    *args,
    596                       const char16_t  *fmt,
    597                       int32_t      *fmtConsumed,
    598                       int32_t      *argConverted)
    599 {
    600    (void)fmt;
    601    (void)fmtConsumed;
    602 
    603    int32_t       len;
    604    double        num;
    605    UNumberFormat *scientificFormat, *genericFormat;
    606    /*int32_t       scientificResult, genericResult;*/
    607    double        scientificResult, genericResult;
    608    int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
    609    int32_t       skipped;
    610    UErrorCode    scientificStatus = U_ZERO_ERROR;
    611    UErrorCode    genericStatus = U_ZERO_ERROR;
    612 
    613 
    614    /* since we can't determine by scanning the characters whether */
    615    /* a number was formatted in the 'f' or 'g' styles, parse the */
    616    /* string with both formatters, and assume whichever one */
    617    /* parsed the most is the correct formatter to use */
    618 
    619 
    620    /* skip all ws in the input */
    621    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    622 
    623    /* fill the input's internal buffer */
    624    ufile_fill_uchar_buffer(input);
    625 
    626    /* determine the size of the input's buffer */
    627    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
    628 
    629    /* truncate to the width, if specified */
    630    if(info->fWidth != -1)
    631        len = ufmt_min(len, info->fWidth);
    632 
    633    /* get the formatters */
    634    scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
    635    genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
    636 
    637    /* handle error */
    638    if (scientificFormat == nullptr || genericFormat == nullptr)
    639        return 0;
    640 
    641    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    642    skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
    643 
    644    /* parse the number using each format*/
    645 
    646    scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
    647        &scientificParsePos, &scientificStatus);
    648 
    649    genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
    650        &genericParsePos, &genericStatus);
    651 
    652    /* determine which parse made it farther */
    653    if(scientificParsePos > genericParsePos) {
    654        /* stash the result in num */
    655        num = scientificResult;
    656        /* update the input's position to reflect consumed data */
    657        parsePos += scientificParsePos;
    658    }
    659    else {
    660        /* stash the result in num */
    661        num = genericResult;
    662        /* update the input's position to reflect consumed data */
    663        parsePos += genericParsePos;
    664    }
    665    input->str.fPos += parsePos;
    666 
    667    if (!info->fSkipArg) {
    668        if (info->fIsLong)
    669            *static_cast<double*>(args[0].ptrValue) = num;
    670        else if (info->fIsLongDouble)
    671            *static_cast<long double*>(args[0].ptrValue) = num;
    672        else
    673            *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
    674    }
    675 
    676    /* mask off any necessary bits */
    677    /*  if(! info->fIsLong_double)
    678    num &= DBL_MAX;*/
    679 
    680    /* we converted 1 arg */
    681    *argConverted = !info->fSkipArg;
    682    return parsePos + skipped;
    683 }
    684 
    685 static int32_t
    686 u_scanf_integer_handler(UFILE       *input,
    687                        u_scanf_spec_info *info,
    688                        ufmt_args   *args,
    689                        const char16_t *fmt,
    690                        int32_t     *fmtConsumed,
    691                        int32_t     *argConverted)
    692 {
    693    (void)fmt;
    694    (void)fmtConsumed;
    695 
    696    int32_t         len;
    697    void* num = args[0].ptrValue;
    698    UNumberFormat   *format, *localFormat;
    699    int32_t         parsePos    = 0;
    700    int32_t         skipped;
    701    int32_t         parseIntOnly = 0;
    702    UErrorCode      status      = U_ZERO_ERROR;
    703    int64_t         result;
    704 
    705 
    706    /* skip all ws in the input */
    707    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    708 
    709    /* fill the input's internal buffer */
    710    ufile_fill_uchar_buffer(input);
    711 
    712    /* determine the size of the input's buffer */
    713    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
    714 
    715    /* truncate to the width, if specified */
    716    if(info->fWidth != -1)
    717        len = ufmt_min(len, info->fWidth);
    718 
    719    /* get the formatter */
    720    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
    721 
    722    /* handle error */
    723    if (format == nullptr)
    724        return 0;
    725 
    726    /* for integer types, do not attempt to parse fractions */
    727    localFormat = unum_clone(format, &status);
    728    if(U_FAILURE(status))
    729        return 0;
    730 
    731    if(info->fSpec == 'd' || info->fSpec == 'i' || info->fSpec == 'u')
    732        parseIntOnly = 1;
    733    unum_setAttribute(localFormat, UNUM_PARSE_INT_ONLY, parseIntOnly);
    734 
    735    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    736    skipped += u_scanf_skip_leading_positive_sign(input, localFormat, &status);
    737 
    738    /* parse the number */
    739    result = unum_parseInt64(localFormat, input->str.fPos, len, &parsePos, &status);
    740 
    741    /* mask off any necessary bits */
    742    if (!info->fSkipArg) {
    743        if (info->fIsShort)
    744            *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
    745        else if (info->fIsLongLong)
    746            *static_cast<int64_t*>(num) = result;
    747        else
    748            *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
    749    }
    750 
    751    /* update the input's position to reflect consumed data */
    752    input->str.fPos += parsePos;
    753 
    754    /* cleanup cloned formatter */
    755    unum_close(localFormat);
    756 
    757    /* we converted 1 arg */
    758    *argConverted = !info->fSkipArg;
    759    return parsePos + skipped;
    760 }
    761 
    762 static int32_t
    763 u_scanf_uinteger_handler(UFILE          *input,
    764                         u_scanf_spec_info *info,
    765                         ufmt_args      *args,
    766                         const char16_t *fmt,
    767                         int32_t        *fmtConsumed,
    768                         int32_t        *argConverted)
    769 {
    770    /* TODO Fix this when Numberformat handles uint64_t */
    771    return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
    772 }
    773 
    774 static int32_t
    775 u_scanf_percent_handler(UFILE       *input,
    776                        u_scanf_spec_info *info,
    777                        ufmt_args   *args,
    778                        const char16_t *fmt,
    779                        int32_t     *fmtConsumed,
    780                        int32_t     *argConverted)
    781 {
    782    (void)fmt;
    783    (void)fmtConsumed;
    784 
    785    int32_t         len;
    786    double          num;
    787    UNumberFormat   *format;
    788    int32_t         parsePos    = 0;
    789    UErrorCode      status      = U_ZERO_ERROR;
    790 
    791 
    792    /* skip all ws in the input */
    793    u_scanf_skip_leading_ws(input, info->fPadChar);
    794 
    795    /* fill the input's internal buffer */
    796    ufile_fill_uchar_buffer(input);
    797 
    798    /* determine the size of the input's buffer */
    799    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
    800 
    801    /* truncate to the width, if specified */
    802    if(info->fWidth != -1)
    803        len = ufmt_min(len, info->fWidth);
    804 
    805    /* get the formatter */
    806    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
    807 
    808    /* handle error */
    809    if (format == nullptr)
    810        return 0;
    811 
    812    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
    813    u_scanf_skip_leading_positive_sign(input, format, &status);
    814 
    815    /* parse the number */
    816    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
    817 
    818    if (!info->fSkipArg) {
    819        *static_cast<double*>(args[0].ptrValue) = num;
    820    }
    821 
    822    /* mask off any necessary bits */
    823    /*  if(! info->fIsLong_double)
    824    num &= DBL_MAX;*/
    825 
    826    /* update the input's position to reflect consumed data */
    827    input->str.fPos += parsePos;
    828 
    829    /* we converted 1 arg */
    830    *argConverted = !info->fSkipArg;
    831    return parsePos;
    832 }
    833 
    834 static int32_t
    835 u_scanf_string_handler(UFILE        *input,
    836                       u_scanf_spec_info *info,
    837                       ufmt_args    *args,
    838                       const char16_t  *fmt,
    839                       int32_t      *fmtConsumed,
    840                       int32_t      *argConverted)
    841 {
    842    (void)fmt;
    843    (void)fmtConsumed;
    844 
    845    const char16_t *source;
    846    UConverter  *conv;
    847    char* arg = static_cast<char*>(args[0].ptrValue);
    848    char        *alias  = arg;
    849    char        *limit;
    850    UErrorCode  status  = U_ZERO_ERROR;
    851    int32_t     count;
    852    int32_t     skipped = 0;
    853    char16_t    c;
    854    UBool       isNotEOF = false;
    855 
    856    /* skip all ws in the input */
    857    if (info->fIsString) {
    858        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    859    }
    860 
    861    /* get the string one character at a time, truncating to the width */
    862    count = 0;
    863 
    864    /* open the default converter */
    865    conv = u_getDefaultConverter(&status);
    866 
    867    if(U_FAILURE(status))
    868        return -1;
    869 
    870    while( (info->fWidth == -1 || count < info->fWidth) 
    871        && ((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true))
    872        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
    873    {
    874 
    875        if (!info->fSkipArg) {
    876            /* put the character from the input onto the target */
    877            source = &c;
    878            /* Since we do this one character at a time, do it this way. */
    879            if (info->fWidth > 0) {
    880                limit = alias + info->fWidth - count;
    881            }
    882            else {
    883                limit = alias + ucnv_getMaxCharSize(conv);
    884            }
    885 
    886            /* convert the character to the default codepage */
    887            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
    888                nullptr, true, &status);
    889 
    890            if(U_FAILURE(status)) {
    891                /* clean up */
    892                u_releaseDefaultConverter(conv);
    893                return -1;
    894            }
    895        }
    896 
    897        /* increment the count */
    898        ++count;
    899    }
    900 
    901    /* put the final character we read back on the input */
    902    if (!info->fSkipArg) {
    903        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
    904            u_fungetc(c, input);
    905 
    906        /* add the terminator */
    907        if (info->fIsString) {
    908            *alias = 0x00;
    909        }
    910    }
    911 
    912    /* clean up */
    913    u_releaseDefaultConverter(conv);
    914 
    915    /* we converted 1 arg */
    916    *argConverted = !info->fSkipArg;
    917    return count + skipped;
    918 }
    919 
    920 static int32_t
    921 u_scanf_char_handler(UFILE          *input,
    922                     u_scanf_spec_info *info,
    923                     ufmt_args      *args,
    924                     const char16_t *fmt,
    925                     int32_t        *fmtConsumed,
    926                     int32_t        *argConverted)
    927 {
    928    if (info->fWidth < 0) {
    929        info->fWidth = 1;
    930    }
    931    info->fIsString = false;
    932    return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
    933 }
    934 
    935 static int32_t
    936 u_scanf_ustring_handler(UFILE       *input,
    937                        u_scanf_spec_info *info,
    938                        ufmt_args   *args,
    939                        const char16_t *fmt,
    940                        int32_t     *fmtConsumed,
    941                        int32_t     *argConverted)
    942 {
    943    (void)fmt;
    944    (void)fmtConsumed;
    945 
    946    char16_t* arg = static_cast<char16_t*>(args[0].ptrValue);
    947    char16_t   *alias     = arg;
    948    int32_t count;
    949    int32_t skipped = 0;
    950    char16_t   c;
    951    UBool   isNotEOF = false;
    952 
    953    /* skip all ws in the input */
    954    if (info->fIsString) {
    955        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
    956    }
    957 
    958    /* get the string one character at a time, truncating to the width */
    959    count = 0;
    960 
    961    while( (info->fWidth == -1 || count < info->fWidth)
    962        && ((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true))
    963        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
    964    {
    965 
    966        /* put the character from the input onto the target */
    967        if (!info->fSkipArg) {
    968            *alias++ = c;
    969        }
    970 
    971        /* increment the count */
    972        ++count;
    973    }
    974 
    975    /* put the final character we read back on the input */
    976    if (!info->fSkipArg) {
    977        if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
    978            u_fungetc(c, input);
    979        }
    980 
    981        /* add the terminator */
    982        if (info->fIsString) {
    983            *alias = 0x0000;
    984        }
    985    }
    986 
    987    /* we converted 1 arg */
    988    *argConverted = !info->fSkipArg;
    989    return count + skipped;
    990 }
    991 
    992 static int32_t
    993 u_scanf_uchar_handler(UFILE         *input,
    994                      u_scanf_spec_info *info,
    995                      ufmt_args     *args,
    996                      const char16_t   *fmt,
    997                      int32_t       *fmtConsumed,
    998                      int32_t       *argConverted)
    999 {
   1000    if (info->fWidth < 0) {
   1001        info->fWidth = 1;
   1002    }
   1003    info->fIsString = false;
   1004    return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
   1005 }
   1006 
   1007 static int32_t
   1008 u_scanf_spellout_handler(UFILE          *input,
   1009                         u_scanf_spec_info *info,
   1010                         ufmt_args      *args,
   1011                         const char16_t *fmt,
   1012                         int32_t        *fmtConsumed,
   1013                         int32_t        *argConverted)
   1014 {
   1015    (void)fmt;
   1016    (void)fmtConsumed;
   1017 
   1018    int32_t         len;
   1019    double          num;
   1020    UNumberFormat   *format;
   1021    int32_t         parsePos    = 0;
   1022    int32_t         skipped;
   1023    UErrorCode      status      = U_ZERO_ERROR;
   1024 
   1025 
   1026    /* skip all ws in the input */
   1027    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1028 
   1029    /* fill the input's internal buffer */
   1030    ufile_fill_uchar_buffer(input);
   1031 
   1032    /* determine the size of the input's buffer */
   1033    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
   1034 
   1035    /* truncate to the width, if specified */
   1036    if(info->fWidth != -1)
   1037        len = ufmt_min(len, info->fWidth);
   1038 
   1039    /* get the formatter */
   1040    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
   1041 
   1042    /* handle error */
   1043    if (format == nullptr)
   1044        return 0;
   1045 
   1046    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1047    /* This is not applicable to RBNF. */
   1048    /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
   1049 
   1050    /* parse the number */
   1051    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
   1052 
   1053    if (!info->fSkipArg) {
   1054        *static_cast<double*>(args[0].ptrValue) = num;
   1055    }
   1056 
   1057    /* mask off any necessary bits */
   1058    /*  if(! info->fIsLong_double)
   1059    num &= DBL_MAX;*/
   1060 
   1061    /* update the input's position to reflect consumed data */
   1062    input->str.fPos += parsePos;
   1063 
   1064    /* we converted 1 arg */
   1065    *argConverted = !info->fSkipArg;
   1066    return parsePos + skipped;
   1067 }
   1068 
   1069 static int32_t
   1070 u_scanf_hex_handler(UFILE       *input,
   1071                    u_scanf_spec_info *info,
   1072                    ufmt_args   *args,
   1073                    const char16_t *fmt,
   1074                    int32_t     *fmtConsumed,
   1075                    int32_t     *argConverted)
   1076 {
   1077    (void)fmt;
   1078    (void)fmtConsumed;
   1079 
   1080    int32_t     len;
   1081    int32_t     skipped;
   1082    void* num = args[0].ptrValue;
   1083    int64_t     result;
   1084 
   1085    /* skip all ws in the input */
   1086    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1087 
   1088    /* fill the input's internal buffer */
   1089    ufile_fill_uchar_buffer(input);
   1090 
   1091    /* determine the size of the input's buffer */
   1092    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
   1093 
   1094    /* truncate to the width, if specified */
   1095    if(info->fWidth != -1)
   1096        len = ufmt_min(len, info->fWidth);
   1097 
   1098    /* check for alternate form */
   1099    if( *(input->str.fPos) == 0x0030 &&
   1100        (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
   1101 
   1102        /* skip the '0' and 'x' or 'X' if present */
   1103        input->str.fPos += 2;
   1104        len -= 2;
   1105    }
   1106 
   1107    /* parse the number */
   1108    result = ufmt_uto64(input->str.fPos, &len, 16);
   1109 
   1110    /* update the input's position to reflect consumed data */
   1111    input->str.fPos += len;
   1112 
   1113    /* mask off any necessary bits */
   1114    if (!info->fSkipArg) {
   1115        if (info->fIsShort)
   1116            *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
   1117        else if (info->fIsLongLong)
   1118            *static_cast<int64_t*>(num) = result;
   1119        else
   1120            *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
   1121    }
   1122 
   1123    /* we converted 1 arg */
   1124    *argConverted = !info->fSkipArg;
   1125    return len + skipped;
   1126 }
   1127 
   1128 static int32_t
   1129 u_scanf_octal_handler(UFILE         *input,
   1130                      u_scanf_spec_info *info,
   1131                      ufmt_args     *args,
   1132                      const char16_t   *fmt,
   1133                      int32_t       *fmtConsumed,
   1134                      int32_t       *argConverted)
   1135 {
   1136    (void)fmt;
   1137    (void)fmtConsumed;
   1138 
   1139    int32_t     len;
   1140    int32_t     skipped;
   1141    void* num = args[0].ptrValue;
   1142    int64_t     result;
   1143 
   1144    /* skip all ws in the input */
   1145    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1146 
   1147    /* fill the input's internal buffer */
   1148    ufile_fill_uchar_buffer(input);
   1149 
   1150    /* determine the size of the input's buffer */
   1151    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
   1152 
   1153    /* truncate to the width, if specified */
   1154    if(info->fWidth != -1)
   1155        len = ufmt_min(len, info->fWidth);
   1156 
   1157    /* parse the number */
   1158    result = ufmt_uto64(input->str.fPos, &len, 8);
   1159 
   1160    /* update the input's position to reflect consumed data */
   1161    input->str.fPos += len;
   1162 
   1163    /* mask off any necessary bits */
   1164    if (!info->fSkipArg) {
   1165        if (info->fIsShort)
   1166            *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
   1167        else if (info->fIsLongLong)
   1168            *static_cast<int64_t*>(num) = result;
   1169        else
   1170            *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
   1171    }
   1172 
   1173    /* we converted 1 arg */
   1174    *argConverted = !info->fSkipArg;
   1175    return len + skipped;
   1176 }
   1177 
   1178 static int32_t
   1179 u_scanf_pointer_handler(UFILE       *input,
   1180                        u_scanf_spec_info *info,
   1181                        ufmt_args   *args,
   1182                        const char16_t *fmt,
   1183                        int32_t     *fmtConsumed,
   1184                        int32_t     *argConverted)
   1185 {
   1186    (void)fmt;
   1187    (void)fmtConsumed;
   1188 
   1189    int32_t len;
   1190    int32_t skipped;
   1191    void    *result;
   1192    void** p = static_cast<void**>(args[0].ptrValue);
   1193 
   1194 
   1195    /* skip all ws in the input */
   1196    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1197 
   1198    /* fill the input's internal buffer */
   1199    ufile_fill_uchar_buffer(input);
   1200 
   1201    /* determine the size of the input's buffer */
   1202    len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
   1203 
   1204    /* truncate to the width, if specified */
   1205    if(info->fWidth != -1) {
   1206        len = ufmt_min(len, info->fWidth);
   1207    }
   1208 
   1209    /* Make sure that we don't consume too much */
   1210    if (len > static_cast<int32_t>(sizeof(void*) * 2)) {
   1211        len = static_cast<int32_t>(sizeof(void*) * 2);
   1212    }
   1213 
   1214    /* parse the pointer - assign to temporary value */
   1215    result = ufmt_utop(input->str.fPos, &len);
   1216 
   1217    if (!info->fSkipArg) {
   1218        *p = result;
   1219    }
   1220 
   1221    /* update the input's position to reflect consumed data */
   1222    input->str.fPos += len;
   1223 
   1224    /* we converted 1 arg */
   1225    *argConverted = !info->fSkipArg;
   1226    return len + skipped;
   1227 }
   1228 
   1229 static int32_t
   1230 u_scanf_scanset_handler(UFILE       *input,
   1231                        u_scanf_spec_info *info,
   1232                        ufmt_args   *args,
   1233                        const char16_t *fmt,
   1234                        int32_t     *fmtConsumed,
   1235                        int32_t     *argConverted)
   1236 {
   1237    USet        *scanset;
   1238    UErrorCode  status = U_ZERO_ERROR;
   1239    int32_t     chLeft = INT32_MAX;
   1240    UChar32     c;
   1241    char16_t* alias = static_cast<char16_t*>(args[0].ptrValue);
   1242    UBool       isNotEOF = false;
   1243    UBool       readCharacter = false;
   1244 
   1245    /* Create an empty set */
   1246    scanset = uset_open(0, -1);
   1247 
   1248    /* Back up one to get the [ */
   1249    fmt--;
   1250 
   1251    /* truncate to the width, if specified and alias the target */
   1252    if(info->fWidth >= 0) {
   1253        chLeft = info->fWidth;
   1254    }
   1255 
   1256    /* parse the scanset from the fmt string */
   1257    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
   1258 
   1259    /* verify that the parse was successful */
   1260    if (U_SUCCESS(status)) {
   1261        c=0;
   1262 
   1263        /* grab characters one at a time and make sure they are in the scanset */
   1264        while(chLeft > 0) {
   1265            if (((isNotEOF = ufile_getch32(input, &c)) == static_cast<UBool>(true)) && uset_contains(scanset, c)) {
   1266                readCharacter = true;
   1267                if (!info->fSkipArg) {
   1268                    int32_t idx = 0;
   1269                    UBool isError = false;
   1270 
   1271                    U16_APPEND(alias, idx, chLeft, c, isError);
   1272                    if (isError) {
   1273                        break;
   1274                    }
   1275                    alias += idx;
   1276                }
   1277                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
   1278            }
   1279            else {
   1280                /* if the character's not in the scanset, break out */
   1281                break;
   1282            }
   1283        }
   1284 
   1285        /* put the final character we read back on the input */
   1286        if(isNotEOF && chLeft > 0) {
   1287            u_fungetc(c, input);
   1288        }
   1289    }
   1290 
   1291    uset_close(scanset);
   1292 
   1293    /* if we didn't match at least 1 character, fail */
   1294    if(!readCharacter)
   1295        return -1;
   1296    /* otherwise, add the terminator */
   1297    else if (!info->fSkipArg) {
   1298        *alias = 0x00;
   1299    }
   1300 
   1301    /* we converted 1 arg */
   1302    *argConverted = !info->fSkipArg;
   1303    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
   1304 }
   1305 
   1306 /* Use US-ASCII characters only for formatting. Most codepages have
   1307 characters 20-7F from Unicode. Using any other codepage specific
   1308 characters will make it very difficult to format the string on
   1309 non-Unicode machines */
   1310 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
   1311 /* 0x20 */
   1312    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1313    UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
   1314    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1315    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1316 
   1317 /* 0x30 */
   1318    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1319    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1320    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1321    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1322 
   1323 /* 0x40 */
   1324    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
   1325    UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
   1326 #ifdef U_USE_OBSOLETE_IO_FORMATTING
   1327    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
   1328 #else
   1329    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1330 #endif
   1331    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1332 
   1333 /* 0x50 */
   1334    UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
   1335 #ifdef U_USE_OBSOLETE_IO_FORMATTING
   1336    UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
   1337 #else
   1338    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
   1339 #endif
   1340    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
   1341    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1342 
   1343 /* 0x60 */
   1344    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
   1345    UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
   1346    UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
   1347    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
   1348 
   1349 /* 0x70 */
   1350    UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
   1351    UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
   1352    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1353    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
   1354 };
   1355 
   1356 U_CFUNC int32_t
   1357 u_scanf_parse(UFILE     *f,
   1358            const char16_t *patternSpecification,
   1359            va_list     ap)
   1360 {
   1361    const char16_t  *alias;
   1362    int32_t         count, converted, argConsumed, cpConsumed;
   1363    uint16_t        handlerNum;
   1364 
   1365    ufmt_args       args;
   1366    u_scanf_spec    spec;
   1367    ufmt_type_info  info;
   1368    u_scanf_handler handler;
   1369 
   1370    /* alias the pattern */
   1371    alias = patternSpecification;
   1372 
   1373    /* haven't converted anything yet */
   1374    argConsumed = 0;
   1375    converted = 0;
   1376    cpConsumed = 0;
   1377 
   1378    /* iterate through the pattern */
   1379    for(;;) {
   1380 
   1381        /* match any characters up to the next '%' */
   1382        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
   1383            alias++;
   1384        }
   1385 
   1386        /* if we aren't at a '%', or if we're at end of string, break*/
   1387        if(*alias != UP_PERCENT || *alias == 0x0000)
   1388            break;
   1389 
   1390        /* parse the specifier */
   1391        count = u_scanf_parse_spec(alias, &spec);
   1392 
   1393        /* update the pointer in pattern */
   1394        alias += count;
   1395 
   1396        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
   1397        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
   1398            /* skip the argument, if necessary */
   1399            /* query the info function for argument information */
   1400            info = g_u_scanf_infos[ handlerNum ].info;
   1401            if (info != ufmt_count && u_feof(f)) {
   1402                break;
   1403            }
   1404            else if(spec.fInfo.fSkipArg) {
   1405                args.ptrValue = nullptr;
   1406            }
   1407            else {
   1408                switch(info) {
   1409                case ufmt_count:
   1410                    /* set the spec's width to the # of items converted */
   1411                    spec.fInfo.fWidth = cpConsumed;
   1412                    U_FALLTHROUGH;
   1413                case ufmt_char:
   1414                case ufmt_uchar:
   1415                case ufmt_int:
   1416                case ufmt_string:
   1417                case ufmt_ustring:
   1418                case ufmt_pointer:
   1419                case ufmt_float:
   1420                case ufmt_double:
   1421                    args.ptrValue = va_arg(ap, void*);
   1422                    break;
   1423 
   1424                default:
   1425                    /* else args is ignored */
   1426                    args.ptrValue = nullptr;
   1427                    break;
   1428                }
   1429            }
   1430 
   1431            /* call the handler function */
   1432            handler = g_u_scanf_infos[ handlerNum ].handler;
   1433            if (handler != nullptr) {
   1434                /* reset count to 1 so that += for alias works. */
   1435                count = 1;
   1436 
   1437                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
   1438 
   1439                /* if the handler encountered an error condition, break */
   1440                if(argConsumed < 0) {
   1441                    converted = -1;
   1442                    break;
   1443                }
   1444 
   1445                /* add to the # of items converted */
   1446                converted += argConsumed;
   1447 
   1448                /* update the pointer in pattern */
   1449                alias += count-1;
   1450            }
   1451            /* else do nothing */
   1452        }
   1453        /* else do nothing */
   1454 
   1455        /* just ignore unknown tags */
   1456    }
   1457 
   1458    /* return # of items converted */
   1459    return converted;
   1460 }
   1461 
   1462 #endif /* #if !UCONFIG_NO_FORMATTING */