math_private.h (25442B)
1 /* 2 * ==================================================== 3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 * 5 * Developed at SunPro, a Sun Microsystems, Inc. business. 6 * Permission to use, copy, modify, and distribute this 7 * software is freely granted, provided that this notice 8 * is preserved. 9 * ==================================================== 10 */ 11 12 /* 13 * from: @(#)fdlibm.h 5.1 93/09/24 14 * $FreeBSD$ 15 */ 16 17 #ifndef _MATH_PRIVATE_H_ 18 #define _MATH_PRIVATE_H_ 19 20 #include <cfloat> 21 #include <stdint.h> 22 #include <sys/types.h> 23 24 #include "mozilla/EndianUtils.h" 25 26 #include "fdlibm.h" 27 28 /* 29 * Emulate FreeBSD internal double types. 30 * Adapted from https://github.com/freebsd/freebsd-src/search?q=__double_t 31 */ 32 33 typedef double __double_t; 34 typedef __double_t double_t; 35 typedef float __float_t; 36 37 /* 38 * The original fdlibm code used statements like: 39 * n0 = ((*(int*)&one)>>29)^1; * index of high word * 40 * ix0 = *(n0+(int*)&x); * high word of x * 41 * ix1 = *((1-n0)+(int*)&x); * low word of x * 42 * to dig two 32 bit words out of the 64 bit IEEE floating point 43 * value. That is non-ANSI, and, moreover, the gcc instruction 44 * scheduler gets it wrong. We instead use the following macros. 45 * Unlike the original code, we determine the endianness at compile 46 * time, not at run time; I don't see much benefit to selecting 47 * endianness at run time. 48 */ 49 50 #ifndef u_int32_t 51 #define u_int32_t uint32_t 52 #endif 53 #ifndef u_int64_t 54 #define u_int64_t uint64_t 55 #endif 56 57 /* A union which permits us to convert between a long double and 58 four 32 bit ints. */ 59 60 #if MOZ_BIG_ENDIAN() 61 62 typedef union 63 { 64 long double value; 65 struct { 66 u_int32_t mswhi; 67 u_int32_t mswlo; 68 u_int32_t lswhi; 69 u_int32_t lswlo; 70 } parts32; 71 struct { 72 u_int64_t msw; 73 u_int64_t lsw; 74 } parts64; 75 } ieee_quad_shape_type; 76 77 #endif 78 79 #if MOZ_LITTLE_ENDIAN() 80 81 typedef union 82 { 83 long double value; 84 struct { 85 u_int32_t lswlo; 86 u_int32_t lswhi; 87 u_int32_t mswlo; 88 u_int32_t mswhi; 89 } parts32; 90 struct { 91 u_int64_t lsw; 92 u_int64_t msw; 93 } parts64; 94 } ieee_quad_shape_type; 95 96 #endif 97 98 #if MOZ_BIG_ENDIAN() 99 100 typedef union 101 { 102 double value; 103 struct 104 { 105 u_int32_t msw; 106 u_int32_t lsw; 107 } parts; 108 struct 109 { 110 u_int64_t w; 111 } xparts; 112 } ieee_double_shape_type; 113 114 #endif 115 116 #if MOZ_LITTLE_ENDIAN() 117 118 typedef union 119 { 120 double value; 121 struct 122 { 123 u_int32_t lsw; 124 u_int32_t msw; 125 } parts; 126 struct 127 { 128 u_int64_t w; 129 } xparts; 130 } ieee_double_shape_type; 131 132 #endif 133 134 /* Get two 32 bit ints from a double. */ 135 136 #define EXTRACT_WORDS(ix0,ix1,d) \ 137 do { \ 138 ieee_double_shape_type ew_u; \ 139 ew_u.value = (d); \ 140 (ix0) = ew_u.parts.msw; \ 141 (ix1) = ew_u.parts.lsw; \ 142 } while (0) 143 144 /* Get a 64-bit int from a double. */ 145 #define EXTRACT_WORD64(ix,d) \ 146 do { \ 147 ieee_double_shape_type ew_u; \ 148 ew_u.value = (d); \ 149 (ix) = ew_u.xparts.w; \ 150 } while (0) 151 152 /* Get the more significant 32 bit int from a double. */ 153 154 #define GET_HIGH_WORD(i,d) \ 155 do { \ 156 ieee_double_shape_type gh_u; \ 157 gh_u.value = (d); \ 158 (i) = gh_u.parts.msw; \ 159 } while (0) 160 161 /* Get the less significant 32 bit int from a double. */ 162 163 #define GET_LOW_WORD(i,d) \ 164 do { \ 165 ieee_double_shape_type gl_u; \ 166 gl_u.value = (d); \ 167 (i) = gl_u.parts.lsw; \ 168 } while (0) 169 170 /* Set a double from two 32 bit ints. */ 171 172 #define INSERT_WORDS(d,ix0,ix1) \ 173 do { \ 174 ieee_double_shape_type iw_u; \ 175 iw_u.parts.msw = (ix0); \ 176 iw_u.parts.lsw = (ix1); \ 177 (d) = iw_u.value; \ 178 } while (0) 179 180 /* Set a double from a 64-bit int. */ 181 #define INSERT_WORD64(d,ix) \ 182 do { \ 183 ieee_double_shape_type iw_u; \ 184 iw_u.xparts.w = (ix); \ 185 (d) = iw_u.value; \ 186 } while (0) 187 188 /* Set the more significant 32 bits of a double from an int. */ 189 190 #define SET_HIGH_WORD(d,v) \ 191 do { \ 192 ieee_double_shape_type sh_u; \ 193 sh_u.value = (d); \ 194 sh_u.parts.msw = (v); \ 195 (d) = sh_u.value; \ 196 } while (0) 197 198 /* Set the less significant 32 bits of a double from an int. */ 199 200 #define SET_LOW_WORD(d,v) \ 201 do { \ 202 ieee_double_shape_type sl_u; \ 203 sl_u.value = (d); \ 204 sl_u.parts.lsw = (v); \ 205 (d) = sl_u.value; \ 206 } while (0) 207 208 /* 209 * A union which permits us to convert between a float and a 32 bit 210 * int. 211 */ 212 213 typedef union 214 { 215 float value; 216 /* FIXME: Assumes 32 bit int. */ 217 unsigned int word; 218 } ieee_float_shape_type; 219 220 /* Get a 32 bit int from a float. */ 221 222 #define GET_FLOAT_WORD(i,d) \ 223 do { \ 224 ieee_float_shape_type gf_u; \ 225 gf_u.value = (d); \ 226 (i) = gf_u.word; \ 227 } while (0) 228 229 /* Set a float from a 32 bit int. */ 230 231 #define SET_FLOAT_WORD(d,i) \ 232 do { \ 233 ieee_float_shape_type sf_u; \ 234 sf_u.word = (i); \ 235 (d) = sf_u.value; \ 236 } while (0) 237 238 /* 239 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long 240 * double. 241 */ 242 243 #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ 244 do { \ 245 union IEEEl2bits ew_u; \ 246 ew_u.e = (d); \ 247 (ix0) = ew_u.xbits.expsign; \ 248 (ix1) = ew_u.xbits.man; \ 249 } while (0) 250 251 /* 252 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit 253 * long double. 254 */ 255 256 #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ 257 do { \ 258 union IEEEl2bits ew_u; \ 259 ew_u.e = (d); \ 260 (ix0) = ew_u.xbits.expsign; \ 261 (ix1) = ew_u.xbits.manh; \ 262 (ix2) = ew_u.xbits.manl; \ 263 } while (0) 264 265 /* Get expsign as a 16 bit int from a long double. */ 266 267 #define GET_LDBL_EXPSIGN(i,d) \ 268 do { \ 269 union IEEEl2bits ge_u; \ 270 ge_u.e = (d); \ 271 (i) = ge_u.xbits.expsign; \ 272 } while (0) 273 274 /* 275 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int 276 * mantissa. 277 */ 278 279 #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ 280 do { \ 281 union IEEEl2bits iw_u; \ 282 iw_u.xbits.expsign = (ix0); \ 283 iw_u.xbits.man = (ix1); \ 284 (d) = iw_u.e; \ 285 } while (0) 286 287 /* 288 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints 289 * comprising the mantissa. 290 */ 291 292 #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ 293 do { \ 294 union IEEEl2bits iw_u; \ 295 iw_u.xbits.expsign = (ix0); \ 296 iw_u.xbits.manh = (ix1); \ 297 iw_u.xbits.manl = (ix2); \ 298 (d) = iw_u.e; \ 299 } while (0) 300 301 /* Set expsign of a long double from a 16 bit int. */ 302 303 #define SET_LDBL_EXPSIGN(d,v) \ 304 do { \ 305 union IEEEl2bits se_u; \ 306 se_u.e = (d); \ 307 se_u.xbits.expsign = (v); \ 308 (d) = se_u.e; \ 309 } while (0) 310 311 #ifdef __i386__ 312 /* Long double constants are broken on i386. */ 313 #define LD80C(m, ex, v) { \ 314 .xbits.man = __CONCAT(m, ULL), \ 315 .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ 316 } 317 #else 318 /* The above works on non-i386 too, but we use this to check v. */ 319 #define LD80C(m, ex, v) { .e = (v), } 320 #endif 321 322 #ifdef FLT_EVAL_METHOD 323 /* 324 * Attempt to get strict C99 semantics for assignment with non-C99 compilers. 325 */ 326 #if !defined(_MSC_VER) && (FLT_EVAL_METHOD == 0 || __GNUC__ == 0) 327 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 328 #else 329 #define STRICT_ASSIGN(type, lval, rval) do { \ 330 volatile type __lval; \ 331 \ 332 if (sizeof(type) >= sizeof(long double)) \ 333 (lval) = (rval); \ 334 else { \ 335 __lval = (rval); \ 336 (lval) = __lval; \ 337 } \ 338 } while (0) 339 #endif 340 #else 341 #define STRICT_ASSIGN(type, lval, rval) do { \ 342 volatile type __lval; \ 343 \ 344 if (sizeof(type) >= sizeof(long double)) \ 345 (lval) = (rval); \ 346 else { \ 347 __lval = (rval); \ 348 (lval) = __lval; \ 349 } \ 350 } while (0) 351 #endif /* FLT_EVAL_METHOD */ 352 353 /* Support switching the mode to FP_PE if necessary. */ 354 #if defined(__i386__) && !defined(NO_FPSETPREC) 355 #define ENTERI() ENTERIT(long double) 356 #define ENTERIT(returntype) \ 357 returntype __retval; \ 358 fp_prec_t __oprec; \ 359 \ 360 if ((__oprec = fpgetprec()) != FP_PE) \ 361 fpsetprec(FP_PE) 362 #define RETURNI(x) do { \ 363 __retval = (x); \ 364 if (__oprec != FP_PE) \ 365 fpsetprec(__oprec); \ 366 RETURNF(__retval); \ 367 } while (0) 368 #define ENTERV() \ 369 fp_prec_t __oprec; \ 370 \ 371 if ((__oprec = fpgetprec()) != FP_PE) \ 372 fpsetprec(FP_PE) 373 #define RETURNV() do { \ 374 if (__oprec != FP_PE) \ 375 fpsetprec(__oprec); \ 376 return; \ 377 } while (0) 378 #else 379 #define ENTERI() 380 #define ENTERIT(x) 381 #define RETURNI(x) RETURNF(x) 382 #define ENTERV() 383 #define RETURNV() return 384 #endif 385 386 /* Default return statement if hack*_t() is not used. */ 387 #define RETURNF(v) return (v) 388 389 /* 390 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or 391 * a == 0, but is slower. 392 */ 393 #define _2sum(a, b) do { \ 394 __typeof(a) __s, __w; \ 395 \ 396 __w = (a) + (b); \ 397 __s = __w - (a); \ 398 (b) = ((a) - (__w - __s)) + ((b) - __s); \ 399 (a) = __w; \ 400 } while (0) 401 402 /* 403 * 2sumF algorithm. 404 * 405 * "Normalize" the terms in the infinite-precision expression a + b for 406 * the sum of 2 floating point values so that b is as small as possible 407 * relative to 'a'. (The resulting 'a' is the value of the expression in 408 * the same precision as 'a' and the resulting b is the rounding error.) 409 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and 410 * exponent overflow or underflow must not occur. This uses a Theorem of 411 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" 412 * is apparently due to Skewchuk (1997). 413 * 414 * For this to always work, assignment of a + b to 'a' must not retain any 415 * extra precision in a + b. This is required by C standards but broken 416 * in many compilers. The brokenness cannot be worked around using 417 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this 418 * algorithm would be destroyed by non-null strict assignments. (The 419 * compilers are correct to be broken -- the efficiency of all floating 420 * point code calculations would be destroyed similarly if they forced the 421 * conversions.) 422 * 423 * Fortunately, a case that works well can usually be arranged by building 424 * any extra precision into the type of 'a' -- 'a' should have type float_t, 425 * double_t or long double. b's type should be no larger than 'a's type. 426 * Callers should use these types with scopes as large as possible, to 427 * reduce their own extra-precision and efficiciency problems. In 428 * particular, they shouldn't convert back and forth just to call here. 429 */ 430 #ifdef DEBUG 431 #define _2sumF(a, b) do { \ 432 __typeof(a) __w; \ 433 volatile __typeof(a) __ia, __ib, __r, __vw; \ 434 \ 435 __ia = (a); \ 436 __ib = (b); \ 437 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ 438 \ 439 __w = (a) + (b); \ 440 (b) = ((a) - __w) + (b); \ 441 (a) = __w; \ 442 \ 443 /* The next 2 assertions are weak if (a) is already long double. */ \ 444 assert((long double)__ia + __ib == (long double)(a) + (b)); \ 445 __vw = __ia + __ib; \ 446 __r = __ia - __vw; \ 447 __r += __ib; \ 448 assert(__vw == (a) && __r == (b)); \ 449 } while (0) 450 #else /* !DEBUG */ 451 #define _2sumF(a, b) do { \ 452 __typeof(a) __w; \ 453 \ 454 __w = (a) + (b); \ 455 (b) = ((a) - __w) + (b); \ 456 (a) = __w; \ 457 } while (0) 458 #endif /* DEBUG */ 459 460 /* 461 * Set x += c, where x is represented in extra precision as a + b. 462 * x must be sufficiently normalized and sufficiently larger than c, 463 * and the result is then sufficiently normalized. 464 * 465 * The details of ordering are that |a| must be >= |c| (so that (a, c) 466 * can be normalized without extra work to swap 'a' with c). The details of 467 * the normalization are that b must be small relative to the normalized 'a'. 468 * Normalization of (a, c) makes the normalized c tiny relative to the 469 * normalized a, so b remains small relative to 'a' in the result. However, 470 * b need not ever be tiny relative to 'a'. For example, b might be about 471 * 2**20 times smaller than 'a' to give about 20 extra bits of precision. 472 * That is usually enough, and adding c (which by normalization is about 473 * 2**53 times smaller than a) cannot change b significantly. However, 474 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' 475 * significantly relative to b. The caller must ensure that significant 476 * cancellation doesn't occur, either by having c of the same sign as 'a', 477 * or by having |c| a few percent smaller than |a|. Pre-normalization of 478 * (a, b) may help. 479 * 480 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 481 * exercise 19). We gain considerable efficiency by requiring the terms to 482 * be sufficiently normalized and sufficiently increasing. 483 */ 484 #define _3sumF(a, b, c) do { \ 485 __typeof(a) __tmp; \ 486 \ 487 __tmp = (c); \ 488 _2sumF(__tmp, (a)); \ 489 (b) += (a); \ 490 (a) = __tmp; \ 491 } while (0) 492 493 /* 494 * Common routine to process the arguments to nan(), nanf(), and nanl(). 495 */ 496 void _scan_nan(uint32_t *__words, int __num_words, const char *__s); 497 498 /* 499 * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns 500 * signaling NaNs into quiet NaNs by setting a quiet bit. We do this 501 * because we want to never return a signaling NaN, and also because we 502 * don't want the quiet bit to affect the result. Then mix the converted 503 * args using the specified operation. 504 * 505 * When one arg is NaN, the result is typically that arg quieted. When both 506 * args are NaNs, the result is typically the quietening of the arg whose 507 * mantissa is largest after quietening. When neither arg is NaN, the 508 * result may be NaN because it is indeterminate, or finite for subsequent 509 * construction of a NaN as the indeterminate 0.0L/0.0L. 510 * 511 * Technical complications: the result in bits after rounding to the final 512 * precision might depend on the runtime precision and/or on compiler 513 * optimizations, especially when different register sets are used for 514 * different precisions. Try to make the result not depend on at least the 515 * runtime precision by always doing the main mixing step in long double 516 * precision. Try to reduce dependencies on optimizations by adding the 517 * the 0's in different precisions (unless everything is in long double 518 * precision). 519 */ 520 #define nan_mix(x, y) (nan_mix_op((x), (y), +)) 521 #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) 522 523 #ifdef _COMPLEX_H 524 525 /* 526 * C99 specifies that complex numbers have the same representation as 527 * an array of two elements, where the first element is the real part 528 * and the second element is the imaginary part. 529 */ 530 typedef union { 531 float complex f; 532 float a[2]; 533 } float_complex; 534 typedef union { 535 double complex f; 536 double a[2]; 537 } double_complex; 538 typedef union { 539 long double complex f; 540 long double a[2]; 541 } long_double_complex; 542 #define REALPART(z) ((z).a[0]) 543 #define IMAGPART(z) ((z).a[1]) 544 545 /* 546 * Inline functions that can be used to construct complex values. 547 * 548 * The C99 standard intends x+I*y to be used for this, but x+I*y is 549 * currently unusable in general since gcc introduces many overflow, 550 * underflow, sign and efficiency bugs by rewriting I*y as 551 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. 552 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted 553 * to -0.0+I*0.0. 554 * 555 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() 556 * to construct complex values. Compilers that conform to the C99 557 * standard require the following functions to avoid the above issues. 558 */ 559 560 #ifndef CMPLXF 561 static __inline float complex 562 CMPLXF(float x, float y) 563 { 564 float_complex z; 565 566 REALPART(z) = x; 567 IMAGPART(z) = y; 568 return (z.f); 569 } 570 #endif 571 572 #ifndef CMPLX 573 static __inline double complex 574 CMPLX(double x, double y) 575 { 576 double_complex z; 577 578 REALPART(z) = x; 579 IMAGPART(z) = y; 580 return (z.f); 581 } 582 #endif 583 584 #ifndef CMPLXL 585 static __inline long double complex 586 CMPLXL(long double x, long double y) 587 { 588 long_double_complex z; 589 590 REALPART(z) = x; 591 IMAGPART(z) = y; 592 return (z.f); 593 } 594 #endif 595 596 #endif /* _COMPLEX_H */ 597 598 /* 599 * The rnint() family rounds to the nearest integer for a restricted range 600 * range of args (up to about 2**MANT_DIG). We assume that the current 601 * rounding mode is FE_TONEAREST so that this can be done efficiently. 602 * Extra precision causes more problems in practice, and we only centralize 603 * this here to reduce those problems, and have not solved the efficiency 604 * problems. The exp2() family uses a more delicate version of this that 605 * requires extracting bits from the intermediate value, so it is not 606 * centralized here and should copy any solution of the efficiency problems. 607 */ 608 609 static inline double 610 rnint(__double_t x) 611 { 612 /* 613 * This casts to double to kill any extra precision. This depends 614 * on the cast being applied to a double_t to avoid compiler bugs 615 * (this is a cleaner version of STRICT_ASSIGN()). This is 616 * inefficient if there actually is extra precision, but is hard 617 * to improve on. We use double_t in the API to minimise conversions 618 * for just calling here. Note that we cannot easily change the 619 * magic number to the one that works directly with double_t, since 620 * the rounding precision is variable at runtime on x86 so the 621 * magic number would need to be variable. Assuming that the 622 * rounding precision is always the default is too fragile. This 623 * and many other complications will move when the default is 624 * changed to FP_PE. 625 */ 626 return ((double)(x + 0x1.8p52) - 0x1.8p52); 627 } 628 629 /* 630 * irint() and i64rint() give the same result as casting to their integer 631 * return type provided their arg is a floating point integer. They can 632 * sometimes be more efficient because no rounding is required. 633 */ 634 #if defined(amd64) || defined(__i386__) 635 #define irint(x) \ 636 (sizeof(x) == sizeof(float) && \ 637 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ 638 sizeof(x) == sizeof(double) && \ 639 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ 640 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) 641 #else 642 #define irint(x) ((int)(x)) 643 #endif 644 645 #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ 646 647 #if defined(__i386__) 648 static __inline int 649 irintf(float x) 650 { 651 int n; 652 653 __asm("fistl %0" : "=m" (n) : "t" (x)); 654 return (n); 655 } 656 657 static __inline int 658 irintd(double x) 659 { 660 int n; 661 662 __asm("fistl %0" : "=m" (n) : "t" (x)); 663 return (n); 664 } 665 #endif 666 667 #if defined(__amd64__) || defined(__i386__) 668 static __inline int 669 irintl(long double x) 670 { 671 int n; 672 673 __asm("fistl %0" : "=m" (n) : "t" (x)); 674 return (n); 675 } 676 #endif 677 678 #ifdef DEBUG 679 #if defined(__amd64__) || defined(__i386__) 680 #define breakpoint() asm("int $3") 681 #elif defined(__wasm__) 682 #define breakpoint() __builtin_trap() 683 #else 684 #include <signal.h> 685 686 #define breakpoint() raise(SIGTRAP) 687 #endif 688 #endif 689 690 /* Write a pari script to test things externally. */ 691 #ifdef DOPRINT 692 #include <stdio.h> 693 694 #ifndef DOPRINT_SWIZZLE 695 #define DOPRINT_SWIZZLE 0 696 #endif 697 698 #ifdef DOPRINT_LD80 699 700 #define DOPRINT_START(xp) do { \ 701 uint64_t __lx; \ 702 uint16_t __hx; \ 703 \ 704 /* Hack to give more-problematic args. */ \ 705 EXTRACT_LDBL80_WORDS(__hx, __lx, *xp); \ 706 __lx ^= DOPRINT_SWIZZLE; \ 707 INSERT_LDBL80_WORDS(*xp, __hx, __lx); \ 708 printf("x = %.21Lg; ", (long double)*xp); \ 709 } while (0) 710 #define DOPRINT_END1(v) \ 711 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) 712 #define DOPRINT_END2(hi, lo) \ 713 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ 714 (long double)(hi), (long double)(lo)) 715 716 #elif defined(DOPRINT_D64) 717 718 #define DOPRINT_START(xp) do { \ 719 uint32_t __hx, __lx; \ 720 \ 721 EXTRACT_WORDS(__hx, __lx, *xp); \ 722 __lx ^= DOPRINT_SWIZZLE; \ 723 INSERT_WORDS(*xp, __hx, __lx); \ 724 printf("x = %.21Lg; ", (long double)*xp); \ 725 } while (0) 726 #define DOPRINT_END1(v) \ 727 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) 728 #define DOPRINT_END2(hi, lo) \ 729 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ 730 (long double)(hi), (long double)(lo)) 731 732 #elif defined(DOPRINT_F32) 733 734 #define DOPRINT_START(xp) do { \ 735 uint32_t __hx; \ 736 \ 737 GET_FLOAT_WORD(__hx, *xp); \ 738 __hx ^= DOPRINT_SWIZZLE; \ 739 SET_FLOAT_WORD(*xp, __hx); \ 740 printf("x = %.21Lg; ", (long double)*xp); \ 741 } while (0) 742 #define DOPRINT_END1(v) \ 743 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) 744 #define DOPRINT_END2(hi, lo) \ 745 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ 746 (long double)(hi), (long double)(lo)) 747 748 #else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */ 749 750 #ifndef DOPRINT_SWIZZLE_HIGH 751 #define DOPRINT_SWIZZLE_HIGH 0 752 #endif 753 754 #define DOPRINT_START(xp) do { \ 755 uint64_t __lx, __llx; \ 756 uint16_t __hx; \ 757 \ 758 EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp); \ 759 __llx ^= DOPRINT_SWIZZLE; \ 760 __lx ^= DOPRINT_SWIZZLE_HIGH; \ 761 INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx); \ 762 printf("x = %.36Lg; ", (long double)*xp); \ 763 } while (0) 764 #define DOPRINT_END1(v) \ 765 printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v)) 766 #define DOPRINT_END2(hi, lo) \ 767 printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n", \ 768 (long double)(hi), (long double)(lo)) 769 770 #endif /* DOPRINT_LD80 */ 771 772 #else /* !DOPRINT */ 773 #define DOPRINT_START(xp) 774 #define DOPRINT_END1(v) 775 #define DOPRINT_END2(hi, lo) 776 #endif /* DOPRINT */ 777 778 #define RETURNP(x) do { \ 779 DOPRINT_END1(x); \ 780 RETURNF(x); \ 781 } while (0) 782 #define RETURNPI(x) do { \ 783 DOPRINT_END1(x); \ 784 RETURNI(x); \ 785 } while (0) 786 #define RETURN2P(x, y) do { \ 787 DOPRINT_END2((x), (y)); \ 788 RETURNF((x) + (y)); \ 789 } while (0) 790 #define RETURN2PI(x, y) do { \ 791 DOPRINT_END2((x), (y)); \ 792 RETURNI((x) + (y)); \ 793 } while (0) 794 #ifdef STRUCT_RETURN 795 #define RETURNSP(rp) do { \ 796 if (!(rp)->lo_set) \ 797 RETURNP((rp)->hi); \ 798 RETURN2P((rp)->hi, (rp)->lo); \ 799 } while (0) 800 #define RETURNSPI(rp) do { \ 801 if (!(rp)->lo_set) \ 802 RETURNPI((rp)->hi); \ 803 RETURN2PI((rp)->hi, (rp)->lo); \ 804 } while (0) 805 #endif 806 #define SUM2P(x, y) ({ \ 807 const __typeof (x) __x = (x); \ 808 const __typeof (y) __y = (y); \ 809 \ 810 DOPRINT_END2(__x, __y); \ 811 __x + __y; \ 812 }) 813 814 /* 815 * ieee style elementary functions 816 * 817 * We rename functions here to improve other sources' diffability 818 * against fdlibm. 819 */ 820 #define __ieee754_sqrt sqrt 821 #define __ieee754_acos acos 822 #define __ieee754_acosh acosh 823 #define __ieee754_log log 824 #define __ieee754_log2 log2 825 #define __ieee754_atanh atanh 826 #define __ieee754_asin asin 827 #define __ieee754_atan2 atan2 828 #define __ieee754_exp exp 829 #define __ieee754_cosh cosh 830 #define __ieee754_fmod fmod 831 #define __ieee754_pow pow 832 #define __ieee754_lgamma lgamma 833 #define __ieee754_gamma gamma 834 #define __ieee754_lgamma_r lgamma_r 835 #define __ieee754_gamma_r gamma_r 836 #define __ieee754_log10 log10 837 #define __ieee754_sinh sinh 838 #define __ieee754_hypot hypot 839 #define __ieee754_j0 j0 840 #define __ieee754_j1 j1 841 #define __ieee754_y0 y0 842 #define __ieee754_y1 y1 843 #define __ieee754_jn jn 844 #define __ieee754_yn yn 845 #define __ieee754_remainder remainder 846 #define __ieee754_scalb scalb 847 #define __ieee754_sqrtf sqrtf 848 #define __ieee754_acosf acosf 849 #define __ieee754_acoshf acoshf 850 #define __ieee754_logf logf 851 #define __ieee754_atanhf atanhf 852 #define __ieee754_asinf asinf 853 #define __ieee754_atan2f atan2f 854 #define __ieee754_expf expf 855 #define __ieee754_coshf coshf 856 #define __ieee754_fmodf fmodf 857 #define __ieee754_powf powf 858 #define __ieee754_lgammaf lgammaf 859 #define __ieee754_gammaf gammaf 860 #define __ieee754_lgammaf_r lgammaf_r 861 #define __ieee754_gammaf_r gammaf_r 862 #define __ieee754_log10f log10f 863 #define __ieee754_log2f log2f 864 #define __ieee754_sinhf sinhf 865 #define __ieee754_hypotf hypotf 866 #define __ieee754_j0f j0f 867 #define __ieee754_j1f j1f 868 #define __ieee754_y0f y0f 869 #define __ieee754_y1f y1f 870 #define __ieee754_jnf jnf 871 #define __ieee754_ynf ynf 872 #define __ieee754_remainderf remainderf 873 #define __ieee754_scalbf scalbf 874 875 #define acos fdlibm_acos 876 #define acosf fdlibm_acosf 877 #define asin fdlibm_asin 878 #define asinf fdlibm_asinf 879 #define atan fdlibm_atan 880 #define atanf fdlibm_atanf 881 #define atan2 fdlibm_atan2 882 #define cos fdlibm_cos 883 #define cosf fdlibm_cosf 884 #define sin fdlibm_sin 885 #define sinf fdlibm_sinf 886 #define tan fdlibm_tan 887 #define tanf fdlibm_tanf 888 #define cosh fdlibm_cosh 889 #define sinh fdlibm_sinh 890 #define tanh fdlibm_tanh 891 #define exp fdlibm_exp 892 #define expf fdlibm_expf 893 #define exp2 fdlibm_exp2 894 #define exp2f fdlibm_exp2f 895 #define log fdlibm_log 896 #define logf fdlibm_logf 897 #define log10 fdlibm_log10 898 #define log10f fdlibm_log10f 899 #define pow fdlibm_pow 900 #define powf fdlibm_powf 901 #define acosh fdlibm_acosh 902 #define asinh fdlibm_asinh 903 #define atanh fdlibm_atanh 904 #define cbrt fdlibm_cbrt 905 #define expm1 fdlibm_expm1 906 #define hypot fdlibm_hypot 907 #define hypotf fdlibm_hypotf 908 #define log1p fdlibm_log1p 909 #define log2 fdlibm_log2 910 911 /* fdlibm kernel function */ 912 int __kernel_rem_pio2(double*,double*,int,int,int); 913 914 /* double precision kernel functions */ 915 #ifndef INLINE_REM_PIO2 916 int __ieee754_rem_pio2(double,double*); 917 #endif 918 double __kernel_sin(double,double,int); 919 double __kernel_cos(double,double); 920 double __kernel_tan(double,double,int); 921 double __ldexp_exp(double,int); 922 #ifdef _COMPLEX_H 923 double complex __ldexp_cexp(double complex,int); 924 #endif 925 926 /* float precision kernel functions */ 927 #ifndef INLINE_REM_PIO2F 928 int __ieee754_rem_pio2f(float,double*); 929 #endif 930 #ifndef INLINE_KERNEL_SINDF 931 float __kernel_sindf(double); 932 #endif 933 #ifndef INLINE_KERNEL_COSDF 934 float __kernel_cosdf(double); 935 #endif 936 #ifndef INLINE_KERNEL_TANDF 937 float __kernel_tandf(double,int); 938 #endif 939 float __ldexp_expf(float,int); 940 #ifdef _COMPLEX_H 941 float complex __ldexp_cexpf(float complex,int); 942 #endif 943 944 /* long double precision kernel functions */ 945 long double __kernel_sinl(long double, long double, int); 946 long double __kernel_cosl(long double, long double); 947 long double __kernel_tanl(long double, long double, int); 948 949 #endif /* !_MATH_PRIVATE_H_ */