prscanf.c (15221B)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /* 7 * Scan functions for NSPR types 8 * 9 * Author: Wan-Teh Chang 10 * 11 * Acknowledgment: The implementation is inspired by the source code 12 * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992. 13 */ 14 15 #include <limits.h> 16 #include <ctype.h> 17 #include <string.h> 18 #include <stdlib.h> 19 #include "prprf.h" 20 #include "prdtoa.h" 21 #include "prlog.h" 22 #include "prerror.h" 23 24 /* 25 * A function that reads a character from 'stream'. 26 * Returns the character read, or EOF if end of stream is reached. 27 */ 28 typedef int (*_PRGetCharFN)(void* stream); 29 30 /* 31 * A function that pushes the character 'ch' back to 'stream'. 32 */ 33 typedef void (*_PRUngetCharFN)(void* stream, int ch); 34 35 /* 36 * The size specifier for the integer and floating point number 37 * conversions in format control strings. 38 */ 39 typedef enum { 40 _PR_size_none, /* No size specifier is given */ 41 _PR_size_h, /* The 'h' specifier, suggesting "short" */ 42 _PR_size_l, /* The 'l' specifier, suggesting "long" */ 43 _PR_size_L, /* The 'L' specifier, meaning a 'long double' */ 44 _PR_size_ll /* The 'll' specifier, suggesting "long long" */ 45 } _PRSizeSpec; 46 47 /* 48 * The collection of data that is passed between the scan function 49 * and its subordinate functions. The fields of this structure 50 * serve as the input or output arguments for these functions. 51 */ 52 typedef struct { 53 _PRGetCharFN get; /* get a character from input stream */ 54 _PRUngetCharFN unget; /* unget (push back) a character */ 55 void* stream; /* argument for get and unget */ 56 va_list ap; /* the variable argument list */ 57 int nChar; /* number of characters read from 'stream' */ 58 59 PRBool assign; /* assign, or suppress assignment? */ 60 int width; /* field width */ 61 _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */ 62 63 PRBool converted; /* is the value actually converted? */ 64 } ScanfState; 65 66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream)) 67 #define UNGET(state, ch) ((state)->nChar--, (state)->unget((state)->stream, ch)) 68 69 /* 70 * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH, 71 * are always used together. 72 * 73 * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return 74 * value to 'ch' only if we have not exceeded the field width of 75 * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of 76 * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true. 77 */ 78 79 #define GET_IF_WITHIN_WIDTH(state, ch) \ 80 if (--(state)->width >= 0) { \ 81 (ch) = GET(state); \ 82 } 83 #define WITHIN_WIDTH(state) ((state)->width >= 0) 84 85 /* 86 * _pr_strtoull: 87 * Convert a string to an unsigned 64-bit integer. The string 88 * 'str' is assumed to be a representation of the integer in 89 * base 'base'. 90 * 91 * Warning: 92 * - Only handle base 8, 10, and 16. 93 * - No overflow checking. 94 */ 95 96 static PRUint64 _pr_strtoull(const char* str, char** endptr, int base) { 97 static const int BASE_MAX = 16; 98 static const char digits[] = "0123456789abcdef"; 99 char* digitPtr; 100 PRUint64 x; /* return value */ 101 PRInt64 base64; 102 const char* cPtr; 103 PRBool negative; 104 const char* digitStart; 105 106 PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16); 107 if (base < 0 || base == 1 || base > BASE_MAX) { 108 if (endptr) { 109 *endptr = (char*)str; 110 return LL_ZERO; 111 } 112 } 113 114 cPtr = str; 115 while (isspace(*cPtr)) { 116 ++cPtr; 117 } 118 119 negative = PR_FALSE; 120 if (*cPtr == '-') { 121 negative = PR_TRUE; 122 cPtr++; 123 } else if (*cPtr == '+') { 124 cPtr++; 125 } 126 127 if (base == 16) { 128 if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) { 129 cPtr += 2; 130 } 131 } else if (base == 0) { 132 if (*cPtr != '0') { 133 base = 10; 134 } else if (cPtr[1] == 'x' || cPtr[1] == 'X') { 135 base = 16; 136 cPtr += 2; 137 } else { 138 base = 8; 139 } 140 } 141 PR_ASSERT(base != 0); 142 LL_I2L(base64, base); 143 digitStart = cPtr; 144 145 /* Skip leading zeros */ 146 while (*cPtr == '0') { 147 cPtr++; 148 } 149 150 LL_I2L(x, 0); 151 while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) { 152 PRUint64 d; 153 154 LL_I2L(d, (digitPtr - digits)); 155 LL_MUL(x, x, base64); 156 LL_ADD(x, x, d); 157 cPtr++; 158 } 159 160 if (cPtr == digitStart) { 161 if (endptr) { 162 *endptr = (char*)str; 163 } 164 return LL_ZERO; 165 } 166 167 if (negative) { 168 #ifdef HAVE_LONG_LONG 169 /* The cast to a signed type is to avoid a compiler warning */ 170 x = -(PRInt64)x; 171 #else 172 LL_NEG(x, x); 173 #endif 174 } 175 176 if (endptr) { 177 *endptr = (char*)cPtr; 178 } 179 return x; 180 } 181 182 /* 183 * The maximum field width (in number of characters) that is enough 184 * (may be more than necessary) to represent a 64-bit integer or 185 * floating point number. 186 */ 187 #define FMAX 31 188 #define DECIMAL_POINT '.' 189 190 static PRStatus GetInt(ScanfState* state, int code) { 191 char buf[FMAX + 1], *p; 192 int ch = 0; 193 static const char digits[] = "0123456789abcdefABCDEF"; 194 PRBool seenDigit = PR_FALSE; 195 int base; 196 int dlen; 197 198 switch (code) { 199 case 'd': 200 case 'u': 201 base = 10; 202 break; 203 case 'i': 204 base = 0; 205 break; 206 case 'x': 207 case 'X': 208 case 'p': 209 base = 16; 210 break; 211 case 'o': 212 base = 8; 213 break; 214 default: 215 return PR_FAILURE; 216 } 217 if (state->width == 0 || state->width > FMAX) { 218 state->width = FMAX; 219 } 220 p = buf; 221 GET_IF_WITHIN_WIDTH(state, ch); 222 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { 223 *p++ = ch; 224 GET_IF_WITHIN_WIDTH(state, ch); 225 } 226 if (WITHIN_WIDTH(state) && ch == '0') { 227 seenDigit = PR_TRUE; 228 *p++ = ch; 229 GET_IF_WITHIN_WIDTH(state, ch); 230 if (WITHIN_WIDTH(state) && (ch == 'x' || ch == 'X') && 231 (base == 0 || base == 16)) { 232 base = 16; 233 *p++ = ch; 234 GET_IF_WITHIN_WIDTH(state, ch); 235 } else if (base == 0) { 236 base = 8; 237 } 238 } 239 if (base == 0 || base == 10) { 240 dlen = 10; 241 } else if (base == 8) { 242 dlen = 8; 243 } else { 244 PR_ASSERT(base == 16); 245 dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */ 246 } 247 while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) { 248 *p++ = ch; 249 GET_IF_WITHIN_WIDTH(state, ch); 250 seenDigit = PR_TRUE; 251 } 252 if (WITHIN_WIDTH(state)) { 253 UNGET(state, ch); 254 } 255 if (!seenDigit) { 256 return PR_FAILURE; 257 } 258 *p = '\0'; 259 if (state->assign) { 260 if (code == 'd' || code == 'i') { 261 if (state->sizeSpec == _PR_size_ll) { 262 PRInt64 llval = _pr_strtoull(buf, NULL, base); 263 *va_arg(state->ap, PRInt64*) = llval; 264 } else { 265 long lval = strtol(buf, NULL, base); 266 267 if (state->sizeSpec == _PR_size_none) { 268 *va_arg(state->ap, PRIntn*) = lval; 269 } else if (state->sizeSpec == _PR_size_h) { 270 *va_arg(state->ap, PRInt16*) = (PRInt16)lval; 271 } else if (state->sizeSpec == _PR_size_l) { 272 *va_arg(state->ap, PRInt32*) = lval; 273 } else { 274 return PR_FAILURE; 275 } 276 } 277 } else { 278 if (state->sizeSpec == _PR_size_ll) { 279 PRUint64 llval = _pr_strtoull(buf, NULL, base); 280 *va_arg(state->ap, PRUint64*) = llval; 281 } else { 282 unsigned long lval = strtoul(buf, NULL, base); 283 284 if (state->sizeSpec == _PR_size_none) { 285 *va_arg(state->ap, PRUintn*) = lval; 286 } else if (state->sizeSpec == _PR_size_h) { 287 *va_arg(state->ap, PRUint16*) = (PRUint16)lval; 288 } else if (state->sizeSpec == _PR_size_l) { 289 *va_arg(state->ap, PRUint32*) = lval; 290 } else { 291 return PR_FAILURE; 292 } 293 } 294 } 295 state->converted = PR_TRUE; 296 } 297 return PR_SUCCESS; 298 } 299 300 static PRStatus GetFloat(ScanfState* state) { 301 char buf[FMAX + 1], *p; 302 int ch = 0; 303 PRBool seenDigit = PR_FALSE; 304 305 if (state->width == 0 || state->width > FMAX) { 306 state->width = FMAX; 307 } 308 p = buf; 309 GET_IF_WITHIN_WIDTH(state, ch); 310 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { 311 *p++ = ch; 312 GET_IF_WITHIN_WIDTH(state, ch); 313 } 314 while (WITHIN_WIDTH(state) && isdigit(ch)) { 315 *p++ = ch; 316 GET_IF_WITHIN_WIDTH(state, ch); 317 seenDigit = PR_TRUE; 318 } 319 if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) { 320 *p++ = ch; 321 GET_IF_WITHIN_WIDTH(state, ch); 322 while (WITHIN_WIDTH(state) && isdigit(ch)) { 323 *p++ = ch; 324 GET_IF_WITHIN_WIDTH(state, ch); 325 seenDigit = PR_TRUE; 326 } 327 } 328 329 /* 330 * This is not robust. For example, "1.2e+" would confuse 331 * the code below to read 'e' and '+', only to realize that 332 * it should have stopped at "1.2". But we can't push back 333 * more than one character, so there is nothing I can do. 334 */ 335 336 /* Parse exponent */ 337 if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) { 338 *p++ = ch; 339 GET_IF_WITHIN_WIDTH(state, ch); 340 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { 341 *p++ = ch; 342 GET_IF_WITHIN_WIDTH(state, ch); 343 } 344 while (WITHIN_WIDTH(state) && isdigit(ch)) { 345 *p++ = ch; 346 GET_IF_WITHIN_WIDTH(state, ch); 347 } 348 } 349 if (WITHIN_WIDTH(state)) { 350 UNGET(state, ch); 351 } 352 if (!seenDigit) { 353 return PR_FAILURE; 354 } 355 *p = '\0'; 356 if (state->assign) { 357 PRFloat64 dval = PR_strtod(buf, NULL); 358 359 state->converted = PR_TRUE; 360 if (state->sizeSpec == _PR_size_l) { 361 *va_arg(state->ap, PRFloat64*) = dval; 362 } else if (state->sizeSpec == _PR_size_L) { 363 *va_arg(state->ap, long double*) = dval; 364 } else { 365 *va_arg(state->ap, float*) = (float)dval; 366 } 367 } 368 return PR_SUCCESS; 369 } 370 371 /* 372 * Convert, and return the end of the conversion spec. 373 * Return NULL on error. 374 */ 375 376 static const char* Convert(ScanfState* state, const char* fmt) { 377 const char* cPtr; 378 int ch; 379 char* cArg = NULL; 380 381 state->converted = PR_FALSE; 382 cPtr = fmt; 383 if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') { 384 do { 385 ch = GET(state); 386 } while (isspace(ch)); 387 UNGET(state, ch); 388 } 389 switch (*cPtr) { 390 case 'c': 391 if (state->assign) { 392 cArg = va_arg(state->ap, char*); 393 } 394 if (state->width == 0) { 395 state->width = 1; 396 } 397 for (; state->width > 0; state->width--) { 398 ch = GET(state); 399 if (ch == EOF) { 400 return NULL; 401 } 402 if (state->assign) { 403 *cArg++ = ch; 404 } 405 } 406 if (state->assign) { 407 state->converted = PR_TRUE; 408 } 409 break; 410 case 'p': 411 case 'd': 412 case 'i': 413 case 'o': 414 case 'u': 415 case 'x': 416 case 'X': 417 if (GetInt(state, *cPtr) == PR_FAILURE) { 418 return NULL; 419 } 420 break; 421 case 'e': 422 case 'E': 423 case 'f': 424 case 'g': 425 case 'G': 426 if (GetFloat(state) == PR_FAILURE) { 427 return NULL; 428 } 429 break; 430 case 'n': 431 /* do not consume any input */ 432 if (state->assign) { 433 switch (state->sizeSpec) { 434 case _PR_size_none: 435 *va_arg(state->ap, PRIntn*) = state->nChar; 436 break; 437 case _PR_size_h: 438 *va_arg(state->ap, PRInt16*) = state->nChar; 439 break; 440 case _PR_size_l: 441 *va_arg(state->ap, PRInt32*) = state->nChar; 442 break; 443 case _PR_size_ll: 444 LL_I2L(*va_arg(state->ap, PRInt64*), state->nChar); 445 break; 446 default: 447 PR_ASSERT(0); 448 } 449 } 450 break; 451 case 's': 452 if (state->width == 0) { 453 state->width = INT_MAX; 454 } 455 if (state->assign) { 456 cArg = va_arg(state->ap, char*); 457 } 458 for (; state->width > 0; state->width--) { 459 ch = GET(state); 460 if ((ch == EOF) || isspace(ch)) { 461 UNGET(state, ch); 462 break; 463 } 464 if (state->assign) { 465 *cArg++ = ch; 466 } 467 } 468 if (state->assign) { 469 *cArg = '\0'; 470 state->converted = PR_TRUE; 471 } 472 break; 473 case '%': 474 ch = GET(state); 475 if (ch != '%') { 476 UNGET(state, ch); 477 return NULL; 478 } 479 break; 480 case '[': { 481 PRBool complement = PR_FALSE; 482 const char* closeBracket; 483 size_t n; 484 485 if (*++cPtr == '^') { 486 complement = PR_TRUE; 487 cPtr++; 488 } 489 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']'); 490 if (closeBracket == NULL) { 491 return NULL; 492 } 493 n = closeBracket - cPtr; 494 if (state->width == 0) { 495 state->width = INT_MAX; 496 } 497 if (state->assign) { 498 cArg = va_arg(state->ap, char*); 499 } 500 for (; state->width > 0; state->width--) { 501 ch = GET(state); 502 if ((ch == EOF) || (!complement && !memchr(cPtr, ch, n)) || 503 (complement && memchr(cPtr, ch, n))) { 504 UNGET(state, ch); 505 break; 506 } 507 if (state->assign) { 508 *cArg++ = ch; 509 } 510 } 511 if (state->assign) { 512 *cArg = '\0'; 513 state->converted = PR_TRUE; 514 } 515 cPtr = closeBracket; 516 } break; 517 default: 518 return NULL; 519 } 520 return cPtr; 521 } 522 523 static PRInt32 DoScanf(ScanfState* state, const char* fmt) { 524 PRInt32 nConverted = 0; 525 const char* cPtr; 526 int ch; 527 528 state->nChar = 0; 529 cPtr = fmt; 530 while (1) { 531 if (isspace(*cPtr)) { 532 /* white space: skip */ 533 do { 534 cPtr++; 535 } while (isspace(*cPtr)); 536 do { 537 ch = GET(state); 538 } while (isspace(ch)); 539 UNGET(state, ch); 540 } else if (*cPtr == '%') { 541 /* format spec: convert */ 542 cPtr++; 543 state->assign = PR_TRUE; 544 if (*cPtr == '*') { 545 cPtr++; 546 state->assign = PR_FALSE; 547 } 548 for (state->width = 0; isdigit(*cPtr); cPtr++) { 549 state->width = state->width * 10 + *cPtr - '0'; 550 } 551 state->sizeSpec = _PR_size_none; 552 if (*cPtr == 'h') { 553 cPtr++; 554 state->sizeSpec = _PR_size_h; 555 } else if (*cPtr == 'l') { 556 cPtr++; 557 if (*cPtr == 'l') { 558 cPtr++; 559 state->sizeSpec = _PR_size_ll; 560 } else { 561 state->sizeSpec = _PR_size_l; 562 } 563 } else if (*cPtr == 'L') { 564 cPtr++; 565 state->sizeSpec = _PR_size_L; 566 } 567 cPtr = Convert(state, cPtr); 568 if (cPtr == NULL) { 569 return (nConverted > 0 ? nConverted : EOF); 570 } 571 if (state->converted) { 572 nConverted++; 573 } 574 cPtr++; 575 } else { 576 /* others: must match */ 577 if (*cPtr == '\0') { 578 return nConverted; 579 } 580 ch = GET(state); 581 if (ch != *cPtr) { 582 UNGET(state, ch); 583 return nConverted; 584 } 585 cPtr++; 586 } 587 } 588 } 589 590 static int StringGetChar(void* stream) { 591 char* cPtr = *((char**)stream); 592 593 if (*cPtr == '\0') { 594 return EOF; 595 } 596 *((char**)stream) = cPtr + 1; 597 return (unsigned char)*cPtr; 598 } 599 600 static void StringUngetChar(void* stream, int ch) { 601 char* cPtr = *((char**)stream); 602 603 if (ch != EOF) { 604 *((char**)stream) = cPtr - 1; 605 } 606 } 607 608 PR_IMPLEMENT(PRInt32) 609 PR_sscanf(const char* buf, const char* fmt, ...) { 610 PRInt32 rv; 611 ScanfState state; 612 613 state.get = &StringGetChar; 614 state.unget = &StringUngetChar; 615 state.stream = (void*)&buf; 616 va_start(state.ap, fmt); 617 rv = DoScanf(&state, fmt); 618 va_end(state.ap); 619 return rv; 620 }