tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

scanf.c (8938B)


      1 /* Copyright (c) 2003-2004, Roger Dingledine
      2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
      3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
      4 /* See LICENSE for licensing information */
      5 
      6 /**
      7 * \file scanf.c
      8 * \brief Locale-independent minimal implementation of sscanf().
      9 **/
     10 
     11 #include "lib/string/scanf.h"
     12 #include "lib/string/compat_ctype.h"
     13 #include "lib/cc/torint.h"
     14 #include "lib/err/torerr.h"
     15 
     16 #include <stdlib.h>
     17 
     18 #define MAX_SCANF_WIDTH 9999
     19 
     20 /** Helper: given an ASCII-encoded decimal digit, return its numeric value.
     21 * NOTE: requires that its input be in-bounds. */
     22 static int
     23 digit_to_num(char d)
     24 {
     25  int num = ((int)d) - (int)'0';
     26  raw_assert(num <= 9 && num >= 0);
     27  return num;
     28 }
     29 
     30 /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
     31 * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
     32 * success, store the result in <b>out</b>, advance bufp to the next
     33 * character, and return 0.  On failure, return -1. */
     34 static int
     35 scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
     36 {
     37  unsigned long result = 0;
     38  int scanned_so_far = 0;
     39  const int hex = base==16;
     40  raw_assert(base == 10 || base == 16);
     41  if (!bufp || !*bufp || !out)
     42    return -1;
     43  if (width<0)
     44    width=MAX_SCANF_WIDTH;
     45 
     46  while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
     47         && scanned_so_far < width) {
     48    unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
     49    // Check for overflow beforehand, without actually causing any overflow
     50    // This preserves functionality on compilers that don't wrap overflow
     51    // (i.e. that trap or optimise away overflow)
     52    // result * base + digit > ULONG_MAX
     53    // result * base > ULONG_MAX - digit
     54    if (result > (ULONG_MAX - digit)/base)
     55      return -1; /* Processing this digit would overflow */
     56    result = result * base + digit;
     57    ++scanned_so_far;
     58  }
     59 
     60  if (!scanned_so_far) /* No actual digits scanned */
     61    return -1;
     62 
     63  *out = result;
     64  return 0;
     65 }
     66 
     67 /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
     68 * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
     69 * success, store the result in <b>out</b>, advance bufp to the next
     70 * character, and return 0.  On failure, return -1. */
     71 static int
     72 scan_signed(const char **bufp, long *out, int width)
     73 {
     74  int neg = 0;
     75  unsigned long result = 0;
     76 
     77  if (!bufp || !*bufp || !out)
     78    return -1;
     79  if (width<0)
     80    width=MAX_SCANF_WIDTH;
     81 
     82  if (**bufp == '-') {
     83    neg = 1;
     84    ++*bufp;
     85    --width;
     86  }
     87 
     88  if (scan_unsigned(bufp, &result, width, 10) < 0)
     89    return -1;
     90 
     91  if (neg && result > 0) {
     92    if (result > ((unsigned long)LONG_MAX) + 1)
     93      return -1; /* Underflow */
     94    else if (result == ((unsigned long)LONG_MAX) + 1)
     95      *out = LONG_MIN;
     96    else {
     97      /* We once had a far more clever no-overflow conversion here, but
     98       * some versions of GCC apparently ran it into the ground.  Now
     99       * we just check for LONG_MIN explicitly.
    100       */
    101      *out = -(long)result;
    102    }
    103  } else {
    104    if (result > LONG_MAX)
    105      return -1; /* Overflow */
    106    *out = (long)result;
    107  }
    108 
    109  return 0;
    110 }
    111 
    112 /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
    113 * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
    114 * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
    115 * next character, and return 0.  On failure, return -1. */
    116 static int
    117 scan_double(const char **bufp, double *out, int width)
    118 {
    119  int neg = 0;
    120  double result = 0;
    121  int scanned_so_far = 0;
    122 
    123  if (!bufp || !*bufp || !out)
    124    return -1;
    125  if (width<0)
    126    width=MAX_SCANF_WIDTH;
    127 
    128  if (**bufp == '-') {
    129    neg = 1;
    130    ++*bufp;
    131  }
    132 
    133  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
    134    const int digit = digit_to_num(*(*bufp)++);
    135    result = result * 10 + digit;
    136    ++scanned_so_far;
    137  }
    138  if (**bufp == '.') {
    139    double fracval = 0, denominator = 1;
    140    ++*bufp;
    141    ++scanned_so_far;
    142    while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
    143      const int digit = digit_to_num(*(*bufp)++);
    144      fracval = fracval * 10 + digit;
    145      denominator *= 10;
    146      ++scanned_so_far;
    147    }
    148    result += fracval / denominator;
    149  }
    150 
    151  if (!scanned_so_far) /* No actual digits scanned */
    152    return -1;
    153 
    154  *out = neg ? -result : result;
    155  return 0;
    156 }
    157 
    158 /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
    159 * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
    160 * to the next non-space character or the EOS. */
    161 static int
    162 scan_string(const char **bufp, char *out, int width)
    163 {
    164  int scanned_so_far = 0;
    165  if (!bufp || !out || width < 0)
    166    return -1;
    167  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
    168    *out++ = *(*bufp)++;
    169    ++scanned_so_far;
    170  }
    171  *out = '\0';
    172  return 0;
    173 }
    174 
    175 /** Locale-independent, minimal, no-surprises scanf variant, accepting only a
    176 * restricted pattern format.  For more info on what it supports, see
    177 * tor_sscanf() documentation.  */
    178 int
    179 tor_vsscanf(const char *buf, const char *pattern, va_list ap)
    180 {
    181  int n_matched = 0;
    182 
    183  while (*pattern) {
    184    if (*pattern != '%') {
    185      if (*buf == *pattern) {
    186        ++buf;
    187        ++pattern;
    188        continue;
    189      } else {
    190        return n_matched;
    191      }
    192    } else {
    193      int width = -1;
    194      int longmod = 0;
    195      ++pattern;
    196      if (TOR_ISDIGIT(*pattern)) {
    197        width = digit_to_num(*pattern++);
    198        while (TOR_ISDIGIT(*pattern)) {
    199          width *= 10;
    200          width += digit_to_num(*pattern++);
    201          if (width > MAX_SCANF_WIDTH)
    202            return -1;
    203        }
    204        if (!width) /* No zero-width things. */
    205          return -1;
    206      }
    207      if (*pattern == 'l') {
    208        longmod = 1;
    209        ++pattern;
    210      }
    211      if (*pattern == 'u' || *pattern == 'x') {
    212        unsigned long u;
    213        const int base = (*pattern == 'u') ? 10 : 16;
    214        if (!*buf)
    215          return n_matched;
    216        if (scan_unsigned(&buf, &u, width, base)<0)
    217          return n_matched;
    218        if (longmod) {
    219          unsigned long *out = va_arg(ap, unsigned long *);
    220          *out = u;
    221        } else {
    222          unsigned *out = va_arg(ap, unsigned *);
    223          if (u > UINT_MAX)
    224            return n_matched;
    225          *out = (unsigned) u;
    226        }
    227        ++pattern;
    228        ++n_matched;
    229      } else if (*pattern == 'f') {
    230        double *d = va_arg(ap, double *);
    231        if (!longmod)
    232          return -1; /* float not supported */
    233        if (!*buf)
    234          return n_matched;
    235        if (scan_double(&buf, d, width)<0)
    236          return n_matched;
    237        ++pattern;
    238        ++n_matched;
    239      } else if (*pattern == 'd') {
    240        long lng=0;
    241        if (scan_signed(&buf, &lng, width)<0)
    242          return n_matched;
    243        if (longmod) {
    244          long *out = va_arg(ap, long *);
    245          *out = lng;
    246        } else {
    247          int *out = va_arg(ap, int *);
    248 #if LONG_MAX > INT_MAX
    249          if (lng < INT_MIN || lng > INT_MAX)
    250            return n_matched;
    251 #endif
    252          *out = (int)lng;
    253        }
    254        ++pattern;
    255        ++n_matched;
    256      } else if (*pattern == 's') {
    257        char *s = va_arg(ap, char *);
    258        if (longmod)
    259          return -1;
    260        if (width < 0)
    261          return -1;
    262        if (scan_string(&buf, s, width)<0)
    263          return n_matched;
    264        ++pattern;
    265        ++n_matched;
    266      } else if (*pattern == 'c') {
    267        char *ch = va_arg(ap, char *);
    268        if (longmod)
    269          return -1;
    270        if (width != -1)
    271          return -1;
    272        if (!*buf)
    273          return n_matched;
    274        *ch = *buf++;
    275        ++pattern;
    276        ++n_matched;
    277      } else if (*pattern == '%') {
    278        if (*buf != '%')
    279          return n_matched;
    280        if (longmod)
    281          return -1;
    282        ++buf;
    283        ++pattern;
    284      } else {
    285        return -1; /* Unrecognized pattern component. */
    286      }
    287    }
    288  }
    289 
    290  return n_matched;
    291 }
    292 
    293 /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
    294 * and store the results in the corresponding argument fields.  Differs from
    295 * sscanf in that:
    296 * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
    297 *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
    298 *     <li>It does not handle arbitrarily long widths.
    299 *     <li>Numbers do not consume any space characters.
    300 *     <li>It is locale-independent.
    301 *     <li>%u and %x do not consume any space.
    302 *     <li>It returns -1 on malformed patterns.</ul>
    303 *
    304 * (As with other locale-independent functions, we need this to parse data that
    305 * is in ASCII without worrying that the C library's locale-handling will make
    306 * miscellaneous characters look like numbers, spaces, and so on.)
    307 */
    308 int
    309 tor_sscanf(const char *buf, const char *pattern, ...)
    310 {
    311  int r;
    312  va_list ap;
    313  va_start(ap, pattern);
    314  r = tor_vsscanf(buf, pattern, ap);
    315  va_end(ap);
    316  return r;
    317 }