tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

prscanf.c (15221B)


      1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /*
      7 * Scan functions for NSPR types
      8 *
      9 * Author: Wan-Teh Chang
     10 *
     11 * Acknowledgment: The implementation is inspired by the source code
     12 * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
     13 */
     14 
     15 #include <limits.h>
     16 #include <ctype.h>
     17 #include <string.h>
     18 #include <stdlib.h>
     19 #include "prprf.h"
     20 #include "prdtoa.h"
     21 #include "prlog.h"
     22 #include "prerror.h"
     23 
     24 /*
     25 * A function that reads a character from 'stream'.
     26 * Returns the character read, or EOF if end of stream is reached.
     27 */
     28 typedef int (*_PRGetCharFN)(void* stream);
     29 
     30 /*
     31 * A function that pushes the character 'ch' back to 'stream'.
     32 */
     33 typedef void (*_PRUngetCharFN)(void* stream, int ch);
     34 
     35 /*
     36 * The size specifier for the integer and floating point number
     37 * conversions in format control strings.
     38 */
     39 typedef enum {
     40  _PR_size_none, /* No size specifier is given */
     41  _PR_size_h,    /* The 'h' specifier, suggesting "short" */
     42  _PR_size_l,    /* The 'l' specifier, suggesting "long" */
     43  _PR_size_L,    /* The 'L' specifier, meaning a 'long double' */
     44  _PR_size_ll    /* The 'll' specifier, suggesting "long long" */
     45 } _PRSizeSpec;
     46 
     47 /*
     48 * The collection of data that is passed between the scan function
     49 * and its subordinate functions.  The fields of this structure
     50 * serve as the input or output arguments for these functions.
     51 */
     52 typedef struct {
     53  _PRGetCharFN get;     /* get a character from input stream */
     54  _PRUngetCharFN unget; /* unget (push back) a character */
     55  void* stream;         /* argument for get and unget */
     56  va_list ap;           /* the variable argument list */
     57  int nChar;            /* number of characters read from 'stream' */
     58 
     59  PRBool assign;        /* assign, or suppress assignment? */
     60  int width;            /* field width */
     61  _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */
     62 
     63  PRBool converted; /* is the value actually converted? */
     64 } ScanfState;
     65 
     66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
     67 #define UNGET(state, ch) ((state)->nChar--, (state)->unget((state)->stream, ch))
     68 
     69 /*
     70 * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
     71 * are always used together.
     72 *
     73 * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
     74 * value to 'ch' only if we have not exceeded the field width of
     75 * 'state'.  Therefore, after GET_IF_WITHIN_WIDTH, the value of
     76 * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
     77 */
     78 
     79 #define GET_IF_WITHIN_WIDTH(state, ch) \
     80  if (--(state)->width >= 0) {         \
     81    (ch) = GET(state);                 \
     82  }
     83 #define WITHIN_WIDTH(state) ((state)->width >= 0)
     84 
     85 /*
     86 * _pr_strtoull:
     87 *     Convert a string to an unsigned 64-bit integer.  The string
     88 *     'str' is assumed to be a representation of the integer in
     89 *     base 'base'.
     90 *
     91 * Warning:
     92 *     - Only handle base 8, 10, and 16.
     93 *     - No overflow checking.
     94 */
     95 
     96 static PRUint64 _pr_strtoull(const char* str, char** endptr, int base) {
     97  static const int BASE_MAX = 16;
     98  static const char digits[] = "0123456789abcdef";
     99  char* digitPtr;
    100  PRUint64 x; /* return value */
    101  PRInt64 base64;
    102  const char* cPtr;
    103  PRBool negative;
    104  const char* digitStart;
    105 
    106  PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
    107  if (base < 0 || base == 1 || base > BASE_MAX) {
    108    if (endptr) {
    109      *endptr = (char*)str;
    110      return LL_ZERO;
    111    }
    112  }
    113 
    114  cPtr = str;
    115  while (isspace(*cPtr)) {
    116    ++cPtr;
    117  }
    118 
    119  negative = PR_FALSE;
    120  if (*cPtr == '-') {
    121    negative = PR_TRUE;
    122    cPtr++;
    123  } else if (*cPtr == '+') {
    124    cPtr++;
    125  }
    126 
    127  if (base == 16) {
    128    if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
    129      cPtr += 2;
    130    }
    131  } else if (base == 0) {
    132    if (*cPtr != '0') {
    133      base = 10;
    134    } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
    135      base = 16;
    136      cPtr += 2;
    137    } else {
    138      base = 8;
    139    }
    140  }
    141  PR_ASSERT(base != 0);
    142  LL_I2L(base64, base);
    143  digitStart = cPtr;
    144 
    145  /* Skip leading zeros */
    146  while (*cPtr == '0') {
    147    cPtr++;
    148  }
    149 
    150  LL_I2L(x, 0);
    151  while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
    152    PRUint64 d;
    153 
    154    LL_I2L(d, (digitPtr - digits));
    155    LL_MUL(x, x, base64);
    156    LL_ADD(x, x, d);
    157    cPtr++;
    158  }
    159 
    160  if (cPtr == digitStart) {
    161    if (endptr) {
    162      *endptr = (char*)str;
    163    }
    164    return LL_ZERO;
    165  }
    166 
    167  if (negative) {
    168 #ifdef HAVE_LONG_LONG
    169    /* The cast to a signed type is to avoid a compiler warning */
    170    x = -(PRInt64)x;
    171 #else
    172    LL_NEG(x, x);
    173 #endif
    174  }
    175 
    176  if (endptr) {
    177    *endptr = (char*)cPtr;
    178  }
    179  return x;
    180 }
    181 
    182 /*
    183 * The maximum field width (in number of characters) that is enough
    184 * (may be more than necessary) to represent a 64-bit integer or
    185 * floating point number.
    186 */
    187 #define FMAX 31
    188 #define DECIMAL_POINT '.'
    189 
    190 static PRStatus GetInt(ScanfState* state, int code) {
    191  char buf[FMAX + 1], *p;
    192  int ch = 0;
    193  static const char digits[] = "0123456789abcdefABCDEF";
    194  PRBool seenDigit = PR_FALSE;
    195  int base;
    196  int dlen;
    197 
    198  switch (code) {
    199    case 'd':
    200    case 'u':
    201      base = 10;
    202      break;
    203    case 'i':
    204      base = 0;
    205      break;
    206    case 'x':
    207    case 'X':
    208    case 'p':
    209      base = 16;
    210      break;
    211    case 'o':
    212      base = 8;
    213      break;
    214    default:
    215      return PR_FAILURE;
    216  }
    217  if (state->width == 0 || state->width > FMAX) {
    218    state->width = FMAX;
    219  }
    220  p = buf;
    221  GET_IF_WITHIN_WIDTH(state, ch);
    222  if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
    223    *p++ = ch;
    224    GET_IF_WITHIN_WIDTH(state, ch);
    225  }
    226  if (WITHIN_WIDTH(state) && ch == '0') {
    227    seenDigit = PR_TRUE;
    228    *p++ = ch;
    229    GET_IF_WITHIN_WIDTH(state, ch);
    230    if (WITHIN_WIDTH(state) && (ch == 'x' || ch == 'X') &&
    231        (base == 0 || base == 16)) {
    232      base = 16;
    233      *p++ = ch;
    234      GET_IF_WITHIN_WIDTH(state, ch);
    235    } else if (base == 0) {
    236      base = 8;
    237    }
    238  }
    239  if (base == 0 || base == 10) {
    240    dlen = 10;
    241  } else if (base == 8) {
    242    dlen = 8;
    243  } else {
    244    PR_ASSERT(base == 16);
    245    dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
    246  }
    247  while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
    248    *p++ = ch;
    249    GET_IF_WITHIN_WIDTH(state, ch);
    250    seenDigit = PR_TRUE;
    251  }
    252  if (WITHIN_WIDTH(state)) {
    253    UNGET(state, ch);
    254  }
    255  if (!seenDigit) {
    256    return PR_FAILURE;
    257  }
    258  *p = '\0';
    259  if (state->assign) {
    260    if (code == 'd' || code == 'i') {
    261      if (state->sizeSpec == _PR_size_ll) {
    262        PRInt64 llval = _pr_strtoull(buf, NULL, base);
    263        *va_arg(state->ap, PRInt64*) = llval;
    264      } else {
    265        long lval = strtol(buf, NULL, base);
    266 
    267        if (state->sizeSpec == _PR_size_none) {
    268          *va_arg(state->ap, PRIntn*) = lval;
    269        } else if (state->sizeSpec == _PR_size_h) {
    270          *va_arg(state->ap, PRInt16*) = (PRInt16)lval;
    271        } else if (state->sizeSpec == _PR_size_l) {
    272          *va_arg(state->ap, PRInt32*) = lval;
    273        } else {
    274          return PR_FAILURE;
    275        }
    276      }
    277    } else {
    278      if (state->sizeSpec == _PR_size_ll) {
    279        PRUint64 llval = _pr_strtoull(buf, NULL, base);
    280        *va_arg(state->ap, PRUint64*) = llval;
    281      } else {
    282        unsigned long lval = strtoul(buf, NULL, base);
    283 
    284        if (state->sizeSpec == _PR_size_none) {
    285          *va_arg(state->ap, PRUintn*) = lval;
    286        } else if (state->sizeSpec == _PR_size_h) {
    287          *va_arg(state->ap, PRUint16*) = (PRUint16)lval;
    288        } else if (state->sizeSpec == _PR_size_l) {
    289          *va_arg(state->ap, PRUint32*) = lval;
    290        } else {
    291          return PR_FAILURE;
    292        }
    293      }
    294    }
    295    state->converted = PR_TRUE;
    296  }
    297  return PR_SUCCESS;
    298 }
    299 
    300 static PRStatus GetFloat(ScanfState* state) {
    301  char buf[FMAX + 1], *p;
    302  int ch = 0;
    303  PRBool seenDigit = PR_FALSE;
    304 
    305  if (state->width == 0 || state->width > FMAX) {
    306    state->width = FMAX;
    307  }
    308  p = buf;
    309  GET_IF_WITHIN_WIDTH(state, ch);
    310  if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
    311    *p++ = ch;
    312    GET_IF_WITHIN_WIDTH(state, ch);
    313  }
    314  while (WITHIN_WIDTH(state) && isdigit(ch)) {
    315    *p++ = ch;
    316    GET_IF_WITHIN_WIDTH(state, ch);
    317    seenDigit = PR_TRUE;
    318  }
    319  if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
    320    *p++ = ch;
    321    GET_IF_WITHIN_WIDTH(state, ch);
    322    while (WITHIN_WIDTH(state) && isdigit(ch)) {
    323      *p++ = ch;
    324      GET_IF_WITHIN_WIDTH(state, ch);
    325      seenDigit = PR_TRUE;
    326    }
    327  }
    328 
    329  /*
    330   * This is not robust.  For example, "1.2e+" would confuse
    331   * the code below to read 'e' and '+', only to realize that
    332   * it should have stopped at "1.2".  But we can't push back
    333   * more than one character, so there is nothing I can do.
    334   */
    335 
    336  /* Parse exponent */
    337  if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
    338    *p++ = ch;
    339    GET_IF_WITHIN_WIDTH(state, ch);
    340    if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
    341      *p++ = ch;
    342      GET_IF_WITHIN_WIDTH(state, ch);
    343    }
    344    while (WITHIN_WIDTH(state) && isdigit(ch)) {
    345      *p++ = ch;
    346      GET_IF_WITHIN_WIDTH(state, ch);
    347    }
    348  }
    349  if (WITHIN_WIDTH(state)) {
    350    UNGET(state, ch);
    351  }
    352  if (!seenDigit) {
    353    return PR_FAILURE;
    354  }
    355  *p = '\0';
    356  if (state->assign) {
    357    PRFloat64 dval = PR_strtod(buf, NULL);
    358 
    359    state->converted = PR_TRUE;
    360    if (state->sizeSpec == _PR_size_l) {
    361      *va_arg(state->ap, PRFloat64*) = dval;
    362    } else if (state->sizeSpec == _PR_size_L) {
    363      *va_arg(state->ap, long double*) = dval;
    364    } else {
    365      *va_arg(state->ap, float*) = (float)dval;
    366    }
    367  }
    368  return PR_SUCCESS;
    369 }
    370 
    371 /*
    372 * Convert, and return the end of the conversion spec.
    373 * Return NULL on error.
    374 */
    375 
    376 static const char* Convert(ScanfState* state, const char* fmt) {
    377  const char* cPtr;
    378  int ch;
    379  char* cArg = NULL;
    380 
    381  state->converted = PR_FALSE;
    382  cPtr = fmt;
    383  if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
    384    do {
    385      ch = GET(state);
    386    } while (isspace(ch));
    387    UNGET(state, ch);
    388  }
    389  switch (*cPtr) {
    390    case 'c':
    391      if (state->assign) {
    392        cArg = va_arg(state->ap, char*);
    393      }
    394      if (state->width == 0) {
    395        state->width = 1;
    396      }
    397      for (; state->width > 0; state->width--) {
    398        ch = GET(state);
    399        if (ch == EOF) {
    400          return NULL;
    401        }
    402        if (state->assign) {
    403          *cArg++ = ch;
    404        }
    405      }
    406      if (state->assign) {
    407        state->converted = PR_TRUE;
    408      }
    409      break;
    410    case 'p':
    411    case 'd':
    412    case 'i':
    413    case 'o':
    414    case 'u':
    415    case 'x':
    416    case 'X':
    417      if (GetInt(state, *cPtr) == PR_FAILURE) {
    418        return NULL;
    419      }
    420      break;
    421    case 'e':
    422    case 'E':
    423    case 'f':
    424    case 'g':
    425    case 'G':
    426      if (GetFloat(state) == PR_FAILURE) {
    427        return NULL;
    428      }
    429      break;
    430    case 'n':
    431      /* do not consume any input */
    432      if (state->assign) {
    433        switch (state->sizeSpec) {
    434          case _PR_size_none:
    435            *va_arg(state->ap, PRIntn*) = state->nChar;
    436            break;
    437          case _PR_size_h:
    438            *va_arg(state->ap, PRInt16*) = state->nChar;
    439            break;
    440          case _PR_size_l:
    441            *va_arg(state->ap, PRInt32*) = state->nChar;
    442            break;
    443          case _PR_size_ll:
    444            LL_I2L(*va_arg(state->ap, PRInt64*), state->nChar);
    445            break;
    446          default:
    447            PR_ASSERT(0);
    448        }
    449      }
    450      break;
    451    case 's':
    452      if (state->width == 0) {
    453        state->width = INT_MAX;
    454      }
    455      if (state->assign) {
    456        cArg = va_arg(state->ap, char*);
    457      }
    458      for (; state->width > 0; state->width--) {
    459        ch = GET(state);
    460        if ((ch == EOF) || isspace(ch)) {
    461          UNGET(state, ch);
    462          break;
    463        }
    464        if (state->assign) {
    465          *cArg++ = ch;
    466        }
    467      }
    468      if (state->assign) {
    469        *cArg = '\0';
    470        state->converted = PR_TRUE;
    471      }
    472      break;
    473    case '%':
    474      ch = GET(state);
    475      if (ch != '%') {
    476        UNGET(state, ch);
    477        return NULL;
    478      }
    479      break;
    480    case '[': {
    481      PRBool complement = PR_FALSE;
    482      const char* closeBracket;
    483      size_t n;
    484 
    485      if (*++cPtr == '^') {
    486        complement = PR_TRUE;
    487        cPtr++;
    488      }
    489      closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
    490      if (closeBracket == NULL) {
    491        return NULL;
    492      }
    493      n = closeBracket - cPtr;
    494      if (state->width == 0) {
    495        state->width = INT_MAX;
    496      }
    497      if (state->assign) {
    498        cArg = va_arg(state->ap, char*);
    499      }
    500      for (; state->width > 0; state->width--) {
    501        ch = GET(state);
    502        if ((ch == EOF) || (!complement && !memchr(cPtr, ch, n)) ||
    503            (complement && memchr(cPtr, ch, n))) {
    504          UNGET(state, ch);
    505          break;
    506        }
    507        if (state->assign) {
    508          *cArg++ = ch;
    509        }
    510      }
    511      if (state->assign) {
    512        *cArg = '\0';
    513        state->converted = PR_TRUE;
    514      }
    515      cPtr = closeBracket;
    516    } break;
    517    default:
    518      return NULL;
    519  }
    520  return cPtr;
    521 }
    522 
    523 static PRInt32 DoScanf(ScanfState* state, const char* fmt) {
    524  PRInt32 nConverted = 0;
    525  const char* cPtr;
    526  int ch;
    527 
    528  state->nChar = 0;
    529  cPtr = fmt;
    530  while (1) {
    531    if (isspace(*cPtr)) {
    532      /* white space: skip */
    533      do {
    534        cPtr++;
    535      } while (isspace(*cPtr));
    536      do {
    537        ch = GET(state);
    538      } while (isspace(ch));
    539      UNGET(state, ch);
    540    } else if (*cPtr == '%') {
    541      /* format spec: convert */
    542      cPtr++;
    543      state->assign = PR_TRUE;
    544      if (*cPtr == '*') {
    545        cPtr++;
    546        state->assign = PR_FALSE;
    547      }
    548      for (state->width = 0; isdigit(*cPtr); cPtr++) {
    549        state->width = state->width * 10 + *cPtr - '0';
    550      }
    551      state->sizeSpec = _PR_size_none;
    552      if (*cPtr == 'h') {
    553        cPtr++;
    554        state->sizeSpec = _PR_size_h;
    555      } else if (*cPtr == 'l') {
    556        cPtr++;
    557        if (*cPtr == 'l') {
    558          cPtr++;
    559          state->sizeSpec = _PR_size_ll;
    560        } else {
    561          state->sizeSpec = _PR_size_l;
    562        }
    563      } else if (*cPtr == 'L') {
    564        cPtr++;
    565        state->sizeSpec = _PR_size_L;
    566      }
    567      cPtr = Convert(state, cPtr);
    568      if (cPtr == NULL) {
    569        return (nConverted > 0 ? nConverted : EOF);
    570      }
    571      if (state->converted) {
    572        nConverted++;
    573      }
    574      cPtr++;
    575    } else {
    576      /* others: must match */
    577      if (*cPtr == '\0') {
    578        return nConverted;
    579      }
    580      ch = GET(state);
    581      if (ch != *cPtr) {
    582        UNGET(state, ch);
    583        return nConverted;
    584      }
    585      cPtr++;
    586    }
    587  }
    588 }
    589 
    590 static int StringGetChar(void* stream) {
    591  char* cPtr = *((char**)stream);
    592 
    593  if (*cPtr == '\0') {
    594    return EOF;
    595  }
    596  *((char**)stream) = cPtr + 1;
    597  return (unsigned char)*cPtr;
    598 }
    599 
    600 static void StringUngetChar(void* stream, int ch) {
    601  char* cPtr = *((char**)stream);
    602 
    603  if (ch != EOF) {
    604    *((char**)stream) = cPtr - 1;
    605  }
    606 }
    607 
    608 PR_IMPLEMENT(PRInt32)
    609 PR_sscanf(const char* buf, const char* fmt, ...) {
    610  PRInt32 rv;
    611  ScanfState state;
    612 
    613  state.get = &StringGetChar;
    614  state.unget = &StringUngetChar;
    615  state.stream = (void*)&buf;
    616  va_start(state.ap, fmt);
    617  rv = DoScanf(&state, fmt);
    618  va_end(state.ap);
    619  return rv;
    620 }