scanf.c (8938B)
1 /* Copyright (c) 2003-2004, Roger Dingledine 2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. 3 * Copyright (c) 2007-2021, The Tor Project, Inc. */ 4 /* See LICENSE for licensing information */ 5 6 /** 7 * \file scanf.c 8 * \brief Locale-independent minimal implementation of sscanf(). 9 **/ 10 11 #include "lib/string/scanf.h" 12 #include "lib/string/compat_ctype.h" 13 #include "lib/cc/torint.h" 14 #include "lib/err/torerr.h" 15 16 #include <stdlib.h> 17 18 #define MAX_SCANF_WIDTH 9999 19 20 /** Helper: given an ASCII-encoded decimal digit, return its numeric value. 21 * NOTE: requires that its input be in-bounds. */ 22 static int 23 digit_to_num(char d) 24 { 25 int num = ((int)d) - (int)'0'; 26 raw_assert(num <= 9 && num >= 0); 27 return num; 28 } 29 30 /** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b> 31 * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On 32 * success, store the result in <b>out</b>, advance bufp to the next 33 * character, and return 0. On failure, return -1. */ 34 static int 35 scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base) 36 { 37 unsigned long result = 0; 38 int scanned_so_far = 0; 39 const int hex = base==16; 40 raw_assert(base == 10 || base == 16); 41 if (!bufp || !*bufp || !out) 42 return -1; 43 if (width<0) 44 width=MAX_SCANF_WIDTH; 45 46 while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp)) 47 && scanned_so_far < width) { 48 unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++); 49 // Check for overflow beforehand, without actually causing any overflow 50 // This preserves functionality on compilers that don't wrap overflow 51 // (i.e. that trap or optimise away overflow) 52 // result * base + digit > ULONG_MAX 53 // result * base > ULONG_MAX - digit 54 if (result > (ULONG_MAX - digit)/base) 55 return -1; /* Processing this digit would overflow */ 56 result = result * base + digit; 57 ++scanned_so_far; 58 } 59 60 if (!scanned_so_far) /* No actual digits scanned */ 61 return -1; 62 63 *out = result; 64 return 0; 65 } 66 67 /** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b> 68 * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On 69 * success, store the result in <b>out</b>, advance bufp to the next 70 * character, and return 0. On failure, return -1. */ 71 static int 72 scan_signed(const char **bufp, long *out, int width) 73 { 74 int neg = 0; 75 unsigned long result = 0; 76 77 if (!bufp || !*bufp || !out) 78 return -1; 79 if (width<0) 80 width=MAX_SCANF_WIDTH; 81 82 if (**bufp == '-') { 83 neg = 1; 84 ++*bufp; 85 --width; 86 } 87 88 if (scan_unsigned(bufp, &result, width, 10) < 0) 89 return -1; 90 91 if (neg && result > 0) { 92 if (result > ((unsigned long)LONG_MAX) + 1) 93 return -1; /* Underflow */ 94 else if (result == ((unsigned long)LONG_MAX) + 1) 95 *out = LONG_MIN; 96 else { 97 /* We once had a far more clever no-overflow conversion here, but 98 * some versions of GCC apparently ran it into the ground. Now 99 * we just check for LONG_MIN explicitly. 100 */ 101 *out = -(long)result; 102 } 103 } else { 104 if (result > LONG_MAX) 105 return -1; /* Overflow */ 106 *out = (long)result; 107 } 108 109 return 0; 110 } 111 112 /** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to 113 * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less 114 * than 0.) On success, store the result in <b>out</b>, advance bufp to the 115 * next character, and return 0. On failure, return -1. */ 116 static int 117 scan_double(const char **bufp, double *out, int width) 118 { 119 int neg = 0; 120 double result = 0; 121 int scanned_so_far = 0; 122 123 if (!bufp || !*bufp || !out) 124 return -1; 125 if (width<0) 126 width=MAX_SCANF_WIDTH; 127 128 if (**bufp == '-') { 129 neg = 1; 130 ++*bufp; 131 } 132 133 while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { 134 const int digit = digit_to_num(*(*bufp)++); 135 result = result * 10 + digit; 136 ++scanned_so_far; 137 } 138 if (**bufp == '.') { 139 double fracval = 0, denominator = 1; 140 ++*bufp; 141 ++scanned_so_far; 142 while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) { 143 const int digit = digit_to_num(*(*bufp)++); 144 fracval = fracval * 10 + digit; 145 denominator *= 10; 146 ++scanned_so_far; 147 } 148 result += fracval / denominator; 149 } 150 151 if (!scanned_so_far) /* No actual digits scanned */ 152 return -1; 153 154 *out = neg ? -result : result; 155 return 0; 156 } 157 158 /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to 159 * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b> 160 * to the next non-space character or the EOS. */ 161 static int 162 scan_string(const char **bufp, char *out, int width) 163 { 164 int scanned_so_far = 0; 165 if (!bufp || !out || width < 0) 166 return -1; 167 while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) { 168 *out++ = *(*bufp)++; 169 ++scanned_so_far; 170 } 171 *out = '\0'; 172 return 0; 173 } 174 175 /** Locale-independent, minimal, no-surprises scanf variant, accepting only a 176 * restricted pattern format. For more info on what it supports, see 177 * tor_sscanf() documentation. */ 178 int 179 tor_vsscanf(const char *buf, const char *pattern, va_list ap) 180 { 181 int n_matched = 0; 182 183 while (*pattern) { 184 if (*pattern != '%') { 185 if (*buf == *pattern) { 186 ++buf; 187 ++pattern; 188 continue; 189 } else { 190 return n_matched; 191 } 192 } else { 193 int width = -1; 194 int longmod = 0; 195 ++pattern; 196 if (TOR_ISDIGIT(*pattern)) { 197 width = digit_to_num(*pattern++); 198 while (TOR_ISDIGIT(*pattern)) { 199 width *= 10; 200 width += digit_to_num(*pattern++); 201 if (width > MAX_SCANF_WIDTH) 202 return -1; 203 } 204 if (!width) /* No zero-width things. */ 205 return -1; 206 } 207 if (*pattern == 'l') { 208 longmod = 1; 209 ++pattern; 210 } 211 if (*pattern == 'u' || *pattern == 'x') { 212 unsigned long u; 213 const int base = (*pattern == 'u') ? 10 : 16; 214 if (!*buf) 215 return n_matched; 216 if (scan_unsigned(&buf, &u, width, base)<0) 217 return n_matched; 218 if (longmod) { 219 unsigned long *out = va_arg(ap, unsigned long *); 220 *out = u; 221 } else { 222 unsigned *out = va_arg(ap, unsigned *); 223 if (u > UINT_MAX) 224 return n_matched; 225 *out = (unsigned) u; 226 } 227 ++pattern; 228 ++n_matched; 229 } else if (*pattern == 'f') { 230 double *d = va_arg(ap, double *); 231 if (!longmod) 232 return -1; /* float not supported */ 233 if (!*buf) 234 return n_matched; 235 if (scan_double(&buf, d, width)<0) 236 return n_matched; 237 ++pattern; 238 ++n_matched; 239 } else if (*pattern == 'd') { 240 long lng=0; 241 if (scan_signed(&buf, &lng, width)<0) 242 return n_matched; 243 if (longmod) { 244 long *out = va_arg(ap, long *); 245 *out = lng; 246 } else { 247 int *out = va_arg(ap, int *); 248 #if LONG_MAX > INT_MAX 249 if (lng < INT_MIN || lng > INT_MAX) 250 return n_matched; 251 #endif 252 *out = (int)lng; 253 } 254 ++pattern; 255 ++n_matched; 256 } else if (*pattern == 's') { 257 char *s = va_arg(ap, char *); 258 if (longmod) 259 return -1; 260 if (width < 0) 261 return -1; 262 if (scan_string(&buf, s, width)<0) 263 return n_matched; 264 ++pattern; 265 ++n_matched; 266 } else if (*pattern == 'c') { 267 char *ch = va_arg(ap, char *); 268 if (longmod) 269 return -1; 270 if (width != -1) 271 return -1; 272 if (!*buf) 273 return n_matched; 274 *ch = *buf++; 275 ++pattern; 276 ++n_matched; 277 } else if (*pattern == '%') { 278 if (*buf != '%') 279 return n_matched; 280 if (longmod) 281 return -1; 282 ++buf; 283 ++pattern; 284 } else { 285 return -1; /* Unrecognized pattern component. */ 286 } 287 } 288 } 289 290 return n_matched; 291 } 292 293 /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b> 294 * and store the results in the corresponding argument fields. Differs from 295 * sscanf in that: 296 * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c. 297 * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1) 298 * <li>It does not handle arbitrarily long widths. 299 * <li>Numbers do not consume any space characters. 300 * <li>It is locale-independent. 301 * <li>%u and %x do not consume any space. 302 * <li>It returns -1 on malformed patterns.</ul> 303 * 304 * (As with other locale-independent functions, we need this to parse data that 305 * is in ASCII without worrying that the C library's locale-handling will make 306 * miscellaneous characters look like numbers, spaces, and so on.) 307 */ 308 int 309 tor_sscanf(const char *buf, const char *pattern, ...) 310 { 311 int r; 312 va_list ap; 313 va_start(ap, pattern); 314 r = tor_vsscanf(buf, pattern, ap); 315 va_end(ap); 316 return r; 317 }