tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

math_private.h (25442B)


      1 /*
      2 * ====================================================
      3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
      4 *
      5 * Developed at SunPro, a Sun Microsystems, Inc. business.
      6 * Permission to use, copy, modify, and distribute this
      7 * software is freely granted, provided that this notice
      8 * is preserved.
      9 * ====================================================
     10 */
     11 
     12 /*
     13 * from: @(#)fdlibm.h 5.1 93/09/24
     14 * $FreeBSD$
     15 */
     16 
     17 #ifndef _MATH_PRIVATE_H_
     18 #define	_MATH_PRIVATE_H_
     19 
     20 #include <cfloat>
     21 #include <stdint.h>
     22 #include <sys/types.h>
     23 
     24 #include "mozilla/EndianUtils.h"
     25 
     26 #include "fdlibm.h"
     27 
     28 /*
     29 * Emulate FreeBSD internal double types.
     30 * Adapted from https://github.com/freebsd/freebsd-src/search?q=__double_t
     31 */
     32 
     33 typedef double      __double_t;
     34 typedef __double_t  double_t;
     35 typedef float       __float_t;
     36 
     37 /*
     38 * The original fdlibm code used statements like:
     39 *	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
     40 *	ix0 = *(n0+(int*)&x);			* high word of x *
     41 *	ix1 = *((1-n0)+(int*)&x);		* low word of x *
     42 * to dig two 32 bit words out of the 64 bit IEEE floating point
     43 * value.  That is non-ANSI, and, moreover, the gcc instruction
     44 * scheduler gets it wrong.  We instead use the following macros.
     45 * Unlike the original code, we determine the endianness at compile
     46 * time, not at run time; I don't see much benefit to selecting
     47 * endianness at run time.
     48 */
     49 
     50 #ifndef u_int32_t
     51 #define u_int32_t uint32_t
     52 #endif
     53 #ifndef u_int64_t
     54 #define u_int64_t uint64_t
     55 #endif
     56 
     57 /* A union which permits us to convert between a long double and
     58   four 32 bit ints.  */
     59 
     60 #if MOZ_BIG_ENDIAN()
     61 
     62 typedef union
     63 {
     64  long double value;
     65  struct {
     66    u_int32_t mswhi;
     67    u_int32_t mswlo;
     68    u_int32_t lswhi;
     69    u_int32_t lswlo;
     70  } parts32;
     71  struct {
     72    u_int64_t msw;
     73    u_int64_t lsw;
     74  } parts64;
     75 } ieee_quad_shape_type;
     76 
     77 #endif
     78 
     79 #if MOZ_LITTLE_ENDIAN()
     80 
     81 typedef union
     82 {
     83  long double value;
     84  struct {
     85    u_int32_t lswlo;
     86    u_int32_t lswhi;
     87    u_int32_t mswlo;
     88    u_int32_t mswhi;
     89  } parts32;
     90  struct {
     91    u_int64_t lsw;
     92    u_int64_t msw;
     93  } parts64;
     94 } ieee_quad_shape_type;
     95 
     96 #endif
     97 
     98 #if MOZ_BIG_ENDIAN()
     99 
    100 typedef union
    101 {
    102  double value;
    103  struct
    104  {
    105    u_int32_t msw;
    106    u_int32_t lsw;
    107  } parts;
    108  struct
    109  {
    110    u_int64_t w;
    111  } xparts;
    112 } ieee_double_shape_type;
    113 
    114 #endif
    115 
    116 #if MOZ_LITTLE_ENDIAN()
    117 
    118 typedef union
    119 {
    120  double value;
    121  struct
    122  {
    123    u_int32_t lsw;
    124    u_int32_t msw;
    125  } parts;
    126  struct
    127  {
    128    u_int64_t w;
    129  } xparts;
    130 } ieee_double_shape_type;
    131 
    132 #endif
    133 
    134 /* Get two 32 bit ints from a double.  */
    135 
    136 #define EXTRACT_WORDS(ix0,ix1,d)				\
    137 do {								\
    138  ieee_double_shape_type ew_u;					\
    139  ew_u.value = (d);						\
    140  (ix0) = ew_u.parts.msw;					\
    141  (ix1) = ew_u.parts.lsw;					\
    142 } while (0)
    143 
    144 /* Get a 64-bit int from a double. */
    145 #define EXTRACT_WORD64(ix,d)					\
    146 do {								\
    147  ieee_double_shape_type ew_u;					\
    148  ew_u.value = (d);						\
    149  (ix) = ew_u.xparts.w;						\
    150 } while (0)
    151 
    152 /* Get the more significant 32 bit int from a double.  */
    153 
    154 #define GET_HIGH_WORD(i,d)					\
    155 do {								\
    156  ieee_double_shape_type gh_u;					\
    157  gh_u.value = (d);						\
    158  (i) = gh_u.parts.msw;						\
    159 } while (0)
    160 
    161 /* Get the less significant 32 bit int from a double.  */
    162 
    163 #define GET_LOW_WORD(i,d)					\
    164 do {								\
    165  ieee_double_shape_type gl_u;					\
    166  gl_u.value = (d);						\
    167  (i) = gl_u.parts.lsw;						\
    168 } while (0)
    169 
    170 /* Set a double from two 32 bit ints.  */
    171 
    172 #define INSERT_WORDS(d,ix0,ix1)					\
    173 do {								\
    174  ieee_double_shape_type iw_u;					\
    175  iw_u.parts.msw = (ix0);					\
    176  iw_u.parts.lsw = (ix1);					\
    177  (d) = iw_u.value;						\
    178 } while (0)
    179 
    180 /* Set a double from a 64-bit int. */
    181 #define INSERT_WORD64(d,ix)					\
    182 do {								\
    183  ieee_double_shape_type iw_u;					\
    184  iw_u.xparts.w = (ix);						\
    185  (d) = iw_u.value;						\
    186 } while (0)
    187 
    188 /* Set the more significant 32 bits of a double from an int.  */
    189 
    190 #define SET_HIGH_WORD(d,v)					\
    191 do {								\
    192  ieee_double_shape_type sh_u;					\
    193  sh_u.value = (d);						\
    194  sh_u.parts.msw = (v);						\
    195  (d) = sh_u.value;						\
    196 } while (0)
    197 
    198 /* Set the less significant 32 bits of a double from an int.  */
    199 
    200 #define SET_LOW_WORD(d,v)					\
    201 do {								\
    202  ieee_double_shape_type sl_u;					\
    203  sl_u.value = (d);						\
    204  sl_u.parts.lsw = (v);						\
    205  (d) = sl_u.value;						\
    206 } while (0)
    207 
    208 /*
    209 * A union which permits us to convert between a float and a 32 bit
    210 * int.
    211 */
    212 
    213 typedef union
    214 {
    215  float value;
    216  /* FIXME: Assumes 32 bit int.  */
    217  unsigned int word;
    218 } ieee_float_shape_type;
    219 
    220 /* Get a 32 bit int from a float.  */
    221 
    222 #define GET_FLOAT_WORD(i,d)					\
    223 do {								\
    224  ieee_float_shape_type gf_u;					\
    225  gf_u.value = (d);						\
    226  (i) = gf_u.word;						\
    227 } while (0)
    228 
    229 /* Set a float from a 32 bit int.  */
    230 
    231 #define SET_FLOAT_WORD(d,i)					\
    232 do {								\
    233  ieee_float_shape_type sf_u;					\
    234  sf_u.word = (i);						\
    235  (d) = sf_u.value;						\
    236 } while (0)
    237 
    238 /*
    239 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
    240 * double.
    241 */
    242 
    243 #define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
    244 do {								\
    245  union IEEEl2bits ew_u;					\
    246  ew_u.e = (d);							\
    247  (ix0) = ew_u.xbits.expsign;					\
    248  (ix1) = ew_u.xbits.man;					\
    249 } while (0)
    250 
    251 /*
    252 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
    253 * long double.
    254 */
    255 
    256 #define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
    257 do {								\
    258  union IEEEl2bits ew_u;					\
    259  ew_u.e = (d);							\
    260  (ix0) = ew_u.xbits.expsign;					\
    261  (ix1) = ew_u.xbits.manh;					\
    262  (ix2) = ew_u.xbits.manl;					\
    263 } while (0)
    264 
    265 /* Get expsign as a 16 bit int from a long double.  */
    266 
    267 #define	GET_LDBL_EXPSIGN(i,d)					\
    268 do {								\
    269  union IEEEl2bits ge_u;					\
    270  ge_u.e = (d);							\
    271  (i) = ge_u.xbits.expsign;					\
    272 } while (0)
    273 
    274 /*
    275 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
    276 * mantissa.
    277 */
    278 
    279 #define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
    280 do {								\
    281  union IEEEl2bits iw_u;					\
    282  iw_u.xbits.expsign = (ix0);					\
    283  iw_u.xbits.man = (ix1);					\
    284  (d) = iw_u.e;							\
    285 } while (0)
    286 
    287 /*
    288 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
    289 * comprising the mantissa.
    290 */
    291 
    292 #define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
    293 do {								\
    294  union IEEEl2bits iw_u;					\
    295  iw_u.xbits.expsign = (ix0);					\
    296  iw_u.xbits.manh = (ix1);					\
    297  iw_u.xbits.manl = (ix2);					\
    298  (d) = iw_u.e;							\
    299 } while (0)
    300 
    301 /* Set expsign of a long double from a 16 bit int.  */
    302 
    303 #define	SET_LDBL_EXPSIGN(d,v)					\
    304 do {								\
    305  union IEEEl2bits se_u;					\
    306  se_u.e = (d);							\
    307  se_u.xbits.expsign = (v);					\
    308  (d) = se_u.e;							\
    309 } while (0)
    310 
    311 #ifdef __i386__
    312 /* Long double constants are broken on i386. */
    313 #define	LD80C(m, ex, v) {						\
    314 .xbits.man = __CONCAT(m, ULL),					\
    315 .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0),	\
    316 }
    317 #else
    318 /* The above works on non-i386 too, but we use this to check v. */
    319 #define	LD80C(m, ex, v)	{ .e = (v), }
    320 #endif
    321 
    322 #ifdef FLT_EVAL_METHOD
    323 /*
    324 * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
    325 */
    326 #if !defined(_MSC_VER) && (FLT_EVAL_METHOD == 0 || __GNUC__ == 0)
    327 #define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval))
    328 #else
    329 #define	STRICT_ASSIGN(type, lval, rval) do {	\
    330 volatile type __lval;			\
    331 					\
    332 if (sizeof(type) >= sizeof(long double))	\
    333 	(lval) = (rval);		\
    334 else {					\
    335 	__lval = (rval);		\
    336 	(lval) = __lval;		\
    337 }					\
    338 } while (0)
    339 #endif
    340 #else
    341 #define	STRICT_ASSIGN(type, lval, rval) do {	\
    342 volatile type __lval;			\
    343 					\
    344 if (sizeof(type) >= sizeof(long double))	\
    345 	(lval) = (rval);		\
    346 else {					\
    347 	__lval = (rval);		\
    348 	(lval) = __lval;		\
    349 }					\
    350 } while (0)
    351 #endif /* FLT_EVAL_METHOD */
    352 
    353 /* Support switching the mode to FP_PE if necessary. */
    354 #if defined(__i386__) && !defined(NO_FPSETPREC)
    355 #define	ENTERI() ENTERIT(long double)
    356 #define	ENTERIT(returntype)			\
    357 returntype __retval;			\
    358 fp_prec_t __oprec;			\
    359 					\
    360 if ((__oprec = fpgetprec()) != FP_PE)	\
    361 	fpsetprec(FP_PE)
    362 #define	RETURNI(x) do {				\
    363 __retval = (x);				\
    364 if (__oprec != FP_PE)			\
    365 	fpsetprec(__oprec);		\
    366 RETURNF(__retval);			\
    367 } while (0)
    368 #define	ENTERV()				\
    369 fp_prec_t __oprec;			\
    370 					\
    371 if ((__oprec = fpgetprec()) != FP_PE)	\
    372 	fpsetprec(FP_PE)
    373 #define	RETURNV() do {				\
    374 if (__oprec != FP_PE)			\
    375 	fpsetprec(__oprec);		\
    376 return;			\
    377 } while (0)
    378 #else
    379 #define	ENTERI()
    380 #define	ENTERIT(x)
    381 #define	RETURNI(x)	RETURNF(x)
    382 #define	ENTERV()
    383 #define	RETURNV()	return
    384 #endif
    385 
    386 /* Default return statement if hack*_t() is not used. */
    387 #define      RETURNF(v)      return (v)
    388 
    389 /*
    390 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
    391 * a == 0, but is slower.
    392 */
    393 #define	_2sum(a, b) do {	\
    394 __typeof(a) __s, __w;	\
    395 			\
    396 __w = (a) + (b);	\
    397 __s = __w - (a);	\
    398 (b) = ((a) - (__w - __s)) + ((b) - __s); \
    399 (a) = __w;		\
    400 } while (0)
    401 
    402 /*
    403 * 2sumF algorithm.
    404 *
    405 * "Normalize" the terms in the infinite-precision expression a + b for
    406 * the sum of 2 floating point values so that b is as small as possible
    407 * relative to 'a'.  (The resulting 'a' is the value of the expression in
    408 * the same precision as 'a' and the resulting b is the rounding error.)
    409 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
    410 * exponent overflow or underflow must not occur.  This uses a Theorem of
    411 * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
    412 * is apparently due to Skewchuk (1997).
    413 *
    414 * For this to always work, assignment of a + b to 'a' must not retain any
    415 * extra precision in a + b.  This is required by C standards but broken
    416 * in many compilers.  The brokenness cannot be worked around using
    417 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
    418 * algorithm would be destroyed by non-null strict assignments.  (The
    419 * compilers are correct to be broken -- the efficiency of all floating
    420 * point code calculations would be destroyed similarly if they forced the
    421 * conversions.)
    422 *
    423 * Fortunately, a case that works well can usually be arranged by building
    424 * any extra precision into the type of 'a' -- 'a' should have type float_t,
    425 * double_t or long double.  b's type should be no larger than 'a's type.
    426 * Callers should use these types with scopes as large as possible, to
    427 * reduce their own extra-precision and efficiciency problems.  In
    428 * particular, they shouldn't convert back and forth just to call here.
    429 */
    430 #ifdef DEBUG
    431 #define	_2sumF(a, b) do {				\
    432 __typeof(a) __w;				\
    433 volatile __typeof(a) __ia, __ib, __r, __vw;	\
    434 						\
    435 __ia = (a);					\
    436 __ib = (b);					\
    437 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
    438 						\
    439 __w = (a) + (b);				\
    440 (b) = ((a) - __w) + (b);			\
    441 (a) = __w;					\
    442 						\
    443 /* The next 2 assertions are weak if (a) is already long double. */ \
    444 assert((long double)__ia + __ib == (long double)(a) + (b));	\
    445 __vw = __ia + __ib;				\
    446 __r = __ia - __vw;				\
    447 __r += __ib;					\
    448 assert(__vw == (a) && __r == (b));		\
    449 } while (0)
    450 #else /* !DEBUG */
    451 #define	_2sumF(a, b) do {	\
    452 __typeof(a) __w;	\
    453 			\
    454 __w = (a) + (b);	\
    455 (b) = ((a) - __w) + (b); \
    456 (a) = __w;		\
    457 } while (0)
    458 #endif /* DEBUG */
    459 
    460 /*
    461 * Set x += c, where x is represented in extra precision as a + b.
    462 * x must be sufficiently normalized and sufficiently larger than c,
    463 * and the result is then sufficiently normalized.
    464 *
    465 * The details of ordering are that |a| must be >= |c| (so that (a, c)
    466 * can be normalized without extra work to swap 'a' with c).  The details of
    467 * the normalization are that b must be small relative to the normalized 'a'.
    468 * Normalization of (a, c) makes the normalized c tiny relative to the
    469 * normalized a, so b remains small relative to 'a' in the result.  However,
    470 * b need not ever be tiny relative to 'a'.  For example, b might be about
    471 * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
    472 * That is usually enough, and adding c (which by normalization is about
    473 * 2**53 times smaller than a) cannot change b significantly.  However,
    474 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
    475 * significantly relative to b.  The caller must ensure that significant
    476 * cancellation doesn't occur, either by having c of the same sign as 'a',
    477 * or by having |c| a few percent smaller than |a|.  Pre-normalization of
    478 * (a, b) may help.
    479 *
    480 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
    481 * exercise 19).  We gain considerable efficiency by requiring the terms to
    482 * be sufficiently normalized and sufficiently increasing.
    483 */
    484 #define	_3sumF(a, b, c) do {	\
    485 __typeof(a) __tmp;	\
    486 			\
    487 __tmp = (c);		\
    488 _2sumF(__tmp, (a));	\
    489 (b) += (a);		\
    490 (a) = __tmp;		\
    491 } while (0)
    492 
    493 /*
    494 * Common routine to process the arguments to nan(), nanf(), and nanl().
    495 */
    496 void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
    497 
    498 /*
    499 * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
    500 * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
    501 * because we want to never return a signaling NaN, and also because we
    502 * don't want the quiet bit to affect the result.  Then mix the converted
    503 * args using the specified operation.
    504 *
    505 * When one arg is NaN, the result is typically that arg quieted.  When both
    506 * args are NaNs, the result is typically the quietening of the arg whose
    507 * mantissa is largest after quietening.  When neither arg is NaN, the
    508 * result may be NaN because it is indeterminate, or finite for subsequent
    509 * construction of a NaN as the indeterminate 0.0L/0.0L.
    510 *
    511 * Technical complications: the result in bits after rounding to the final
    512 * precision might depend on the runtime precision and/or on compiler
    513 * optimizations, especially when different register sets are used for
    514 * different precisions.  Try to make the result not depend on at least the
    515 * runtime precision by always doing the main mixing step in long double
    516 * precision.  Try to reduce dependencies on optimizations by adding the
    517 * the 0's in different precisions (unless everything is in long double
    518 * precision).
    519 */
    520 #define	nan_mix(x, y)		(nan_mix_op((x), (y), +))
    521 #define	nan_mix_op(x, y, op)	(((x) + 0.0L) op ((y) + 0))
    522 
    523 #ifdef _COMPLEX_H
    524 
    525 /*
    526 * C99 specifies that complex numbers have the same representation as
    527 * an array of two elements, where the first element is the real part
    528 * and the second element is the imaginary part.
    529 */
    530 typedef union {
    531 float complex f;
    532 float a[2];
    533 } float_complex;
    534 typedef union {
    535 double complex f;
    536 double a[2];
    537 } double_complex;
    538 typedef union {
    539 long double complex f;
    540 long double a[2];
    541 } long_double_complex;
    542 #define	REALPART(z)	((z).a[0])
    543 #define	IMAGPART(z)	((z).a[1])
    544 
    545 /*
    546 * Inline functions that can be used to construct complex values.
    547 *
    548 * The C99 standard intends x+I*y to be used for this, but x+I*y is
    549 * currently unusable in general since gcc introduces many overflow,
    550 * underflow, sign and efficiency bugs by rewriting I*y as
    551 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
    552 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
    553 * to -0.0+I*0.0.
    554 *
    555 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
    556 * to construct complex values.  Compilers that conform to the C99
    557 * standard require the following functions to avoid the above issues.
    558 */
    559 
    560 #ifndef CMPLXF
    561 static __inline float complex
    562 CMPLXF(float x, float y)
    563 {
    564 float_complex z;
    565 
    566 REALPART(z) = x;
    567 IMAGPART(z) = y;
    568 return (z.f);
    569 }
    570 #endif
    571 
    572 #ifndef CMPLX
    573 static __inline double complex
    574 CMPLX(double x, double y)
    575 {
    576 double_complex z;
    577 
    578 REALPART(z) = x;
    579 IMAGPART(z) = y;
    580 return (z.f);
    581 }
    582 #endif
    583 
    584 #ifndef CMPLXL
    585 static __inline long double complex
    586 CMPLXL(long double x, long double y)
    587 {
    588 long_double_complex z;
    589 
    590 REALPART(z) = x;
    591 IMAGPART(z) = y;
    592 return (z.f);
    593 }
    594 #endif
    595 
    596 #endif /* _COMPLEX_H */
    597 
    598 /*
    599 * The rnint() family rounds to the nearest integer for a restricted range
    600 * range of args (up to about 2**MANT_DIG).  We assume that the current
    601 * rounding mode is FE_TONEAREST so that this can be done efficiently.
    602 * Extra precision causes more problems in practice, and we only centralize
    603 * this here to reduce those problems, and have not solved the efficiency
    604 * problems.  The exp2() family uses a more delicate version of this that
    605 * requires extracting bits from the intermediate value, so it is not
    606 * centralized here and should copy any solution of the efficiency problems.
    607 */
    608 
    609 static inline double
    610 rnint(__double_t x)
    611 {
    612 /*
    613  * This casts to double to kill any extra precision.  This depends
    614  * on the cast being applied to a double_t to avoid compiler bugs
    615  * (this is a cleaner version of STRICT_ASSIGN()).  This is
    616  * inefficient if there actually is extra precision, but is hard
    617  * to improve on.  We use double_t in the API to minimise conversions
    618  * for just calling here.  Note that we cannot easily change the
    619  * magic number to the one that works directly with double_t, since
    620  * the rounding precision is variable at runtime on x86 so the
    621  * magic number would need to be variable.  Assuming that the
    622  * rounding precision is always the default is too fragile.  This
    623  * and many other complications will move when the default is
    624  * changed to FP_PE.
    625  */
    626 return ((double)(x + 0x1.8p52) - 0x1.8p52);
    627 }
    628 
    629 /*
    630 * irint() and i64rint() give the same result as casting to their integer
    631 * return type provided their arg is a floating point integer.  They can
    632 * sometimes be more efficient because no rounding is required.
    633 */
    634 #if defined(amd64) || defined(__i386__)
    635 #define	irint(x)						\
    636    (sizeof(x) == sizeof(float) &&				\
    637    sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
    638    sizeof(x) == sizeof(double) &&				\
    639    sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
    640    sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
    641 #else
    642 #define	irint(x)	((int)(x))
    643 #endif
    644 
    645 #define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
    646 
    647 #if defined(__i386__)
    648 static __inline int
    649 irintf(float x)
    650 {
    651 int n;
    652 
    653 __asm("fistl %0" : "=m" (n) : "t" (x));
    654 return (n);
    655 }
    656 
    657 static __inline int
    658 irintd(double x)
    659 {
    660 int n;
    661 
    662 __asm("fistl %0" : "=m" (n) : "t" (x));
    663 return (n);
    664 }
    665 #endif
    666 
    667 #if defined(__amd64__) || defined(__i386__)
    668 static __inline int
    669 irintl(long double x)
    670 {
    671 int n;
    672 
    673 __asm("fistl %0" : "=m" (n) : "t" (x));
    674 return (n);
    675 }
    676 #endif
    677 
    678 #ifdef DEBUG
    679 #if defined(__amd64__) || defined(__i386__)
    680 #define	breakpoint()	asm("int $3")
    681 #elif defined(__wasm__)
    682 #define breakpoint()    __builtin_trap()
    683 #else
    684 #include <signal.h>
    685 
    686 #define	breakpoint()	raise(SIGTRAP)
    687 #endif
    688 #endif
    689 
    690 /* Write a pari script to test things externally. */
    691 #ifdef DOPRINT
    692 #include <stdio.h>
    693 
    694 #ifndef DOPRINT_SWIZZLE
    695 #define	DOPRINT_SWIZZLE		0
    696 #endif
    697 
    698 #ifdef DOPRINT_LD80
    699 
    700 #define	DOPRINT_START(xp) do {						\
    701 uint64_t __lx;							\
    702 uint16_t __hx;							\
    703 								\
    704 /* Hack to give more-problematic args. */			\
    705 EXTRACT_LDBL80_WORDS(__hx, __lx, *xp);				\
    706 __lx ^= DOPRINT_SWIZZLE;					\
    707 INSERT_LDBL80_WORDS(*xp, __hx, __lx);				\
    708 printf("x = %.21Lg; ", (long double)*xp);			\
    709 } while (0)
    710 #define	DOPRINT_END1(v)							\
    711 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
    712 #define	DOPRINT_END2(hi, lo)						\
    713 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
    714     (long double)(hi), (long double)(lo))
    715 
    716 #elif defined(DOPRINT_D64)
    717 
    718 #define	DOPRINT_START(xp) do {						\
    719 uint32_t __hx, __lx;						\
    720 								\
    721 EXTRACT_WORDS(__hx, __lx, *xp);					\
    722 __lx ^= DOPRINT_SWIZZLE;					\
    723 INSERT_WORDS(*xp, __hx, __lx);					\
    724 printf("x = %.21Lg; ", (long double)*xp);			\
    725 } while (0)
    726 #define	DOPRINT_END1(v)							\
    727 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
    728 #define	DOPRINT_END2(hi, lo)						\
    729 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
    730     (long double)(hi), (long double)(lo))
    731 
    732 #elif defined(DOPRINT_F32)
    733 
    734 #define	DOPRINT_START(xp) do {						\
    735 uint32_t __hx;							\
    736 								\
    737 GET_FLOAT_WORD(__hx, *xp);					\
    738 __hx ^= DOPRINT_SWIZZLE;					\
    739 SET_FLOAT_WORD(*xp, __hx);					\
    740 printf("x = %.21Lg; ", (long double)*xp);			\
    741 } while (0)
    742 #define	DOPRINT_END1(v)							\
    743 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
    744 #define	DOPRINT_END2(hi, lo)						\
    745 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
    746     (long double)(hi), (long double)(lo))
    747 
    748 #else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */
    749 
    750 #ifndef DOPRINT_SWIZZLE_HIGH
    751 #define	DOPRINT_SWIZZLE_HIGH	0
    752 #endif
    753 
    754 #define	DOPRINT_START(xp) do {						\
    755 uint64_t __lx, __llx;						\
    756 uint16_t __hx;							\
    757 								\
    758 EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp);			\
    759 __llx ^= DOPRINT_SWIZZLE;					\
    760 __lx ^= DOPRINT_SWIZZLE_HIGH;					\
    761 INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx);			\
    762 printf("x = %.36Lg; ", (long double)*xp);					\
    763 } while (0)
    764 #define	DOPRINT_END1(v)							\
    765 printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v))
    766 #define	DOPRINT_END2(hi, lo)						\
    767 printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n",		\
    768     (long double)(hi), (long double)(lo))
    769 
    770 #endif /* DOPRINT_LD80 */
    771 
    772 #else /* !DOPRINT */
    773 #define	DOPRINT_START(xp)
    774 #define	DOPRINT_END1(v)
    775 #define	DOPRINT_END2(hi, lo)
    776 #endif /* DOPRINT */
    777 
    778 #define	RETURNP(x) do {			\
    779 DOPRINT_END1(x);		\
    780 RETURNF(x);			\
    781 } while (0)
    782 #define	RETURNPI(x) do {		\
    783 DOPRINT_END1(x);		\
    784 RETURNI(x);			\
    785 } while (0)
    786 #define	RETURN2P(x, y) do {		\
    787 DOPRINT_END2((x), (y));		\
    788 RETURNF((x) + (y));		\
    789 } while (0)
    790 #define	RETURN2PI(x, y) do {		\
    791 DOPRINT_END2((x), (y));		\
    792 RETURNI((x) + (y));		\
    793 } while (0)
    794 #ifdef STRUCT_RETURN
    795 #define	RETURNSP(rp) do {		\
    796 if (!(rp)->lo_set)		\
    797 	RETURNP((rp)->hi);	\
    798 RETURN2P((rp)->hi, (rp)->lo);	\
    799 } while (0)
    800 #define	RETURNSPI(rp) do {		\
    801 if (!(rp)->lo_set)		\
    802 	RETURNPI((rp)->hi);	\
    803 RETURN2PI((rp)->hi, (rp)->lo);	\
    804 } while (0)
    805 #endif
    806 #define	SUM2P(x, y) ({			\
    807 const __typeof (x) __x = (x);	\
    808 const __typeof (y) __y = (y);	\
    809 				\
    810 DOPRINT_END2(__x, __y);		\
    811 __x + __y;			\
    812 })
    813 
    814 /*
    815 * ieee style elementary functions
    816 *
    817 * We rename functions here to improve other sources' diffability
    818 * against fdlibm.
    819 */
    820 #define	__ieee754_sqrt	sqrt
    821 #define	__ieee754_acos	acos
    822 #define	__ieee754_acosh	acosh
    823 #define	__ieee754_log	log
    824 #define	__ieee754_log2	log2
    825 #define	__ieee754_atanh	atanh
    826 #define	__ieee754_asin	asin
    827 #define	__ieee754_atan2	atan2
    828 #define	__ieee754_exp	exp
    829 #define	__ieee754_cosh	cosh
    830 #define	__ieee754_fmod	fmod
    831 #define	__ieee754_pow	pow
    832 #define	__ieee754_lgamma lgamma
    833 #define	__ieee754_gamma	gamma
    834 #define	__ieee754_lgamma_r lgamma_r
    835 #define	__ieee754_gamma_r gamma_r
    836 #define	__ieee754_log10	log10
    837 #define	__ieee754_sinh	sinh
    838 #define	__ieee754_hypot	hypot
    839 #define	__ieee754_j0	j0
    840 #define	__ieee754_j1	j1
    841 #define	__ieee754_y0	y0
    842 #define	__ieee754_y1	y1
    843 #define	__ieee754_jn	jn
    844 #define	__ieee754_yn	yn
    845 #define	__ieee754_remainder remainder
    846 #define	__ieee754_scalb	scalb
    847 #define	__ieee754_sqrtf	sqrtf
    848 #define	__ieee754_acosf	acosf
    849 #define	__ieee754_acoshf acoshf
    850 #define	__ieee754_logf	logf
    851 #define	__ieee754_atanhf atanhf
    852 #define	__ieee754_asinf	asinf
    853 #define	__ieee754_atan2f atan2f
    854 #define	__ieee754_expf	expf
    855 #define	__ieee754_coshf	coshf
    856 #define	__ieee754_fmodf	fmodf
    857 #define	__ieee754_powf	powf
    858 #define	__ieee754_lgammaf lgammaf
    859 #define	__ieee754_gammaf gammaf
    860 #define	__ieee754_lgammaf_r lgammaf_r
    861 #define	__ieee754_gammaf_r gammaf_r
    862 #define	__ieee754_log10f log10f
    863 #define	__ieee754_log2f log2f
    864 #define	__ieee754_sinhf	sinhf
    865 #define	__ieee754_hypotf hypotf
    866 #define	__ieee754_j0f	j0f
    867 #define	__ieee754_j1f	j1f
    868 #define	__ieee754_y0f	y0f
    869 #define	__ieee754_y1f	y1f
    870 #define	__ieee754_jnf	jnf
    871 #define	__ieee754_ynf	ynf
    872 #define	__ieee754_remainderf remainderf
    873 #define	__ieee754_scalbf scalbf
    874 
    875 #define acos fdlibm_acos
    876 #define acosf fdlibm_acosf
    877 #define asin fdlibm_asin
    878 #define asinf fdlibm_asinf
    879 #define atan fdlibm_atan
    880 #define atanf fdlibm_atanf
    881 #define atan2 fdlibm_atan2
    882 #define cos fdlibm_cos
    883 #define cosf fdlibm_cosf
    884 #define sin fdlibm_sin
    885 #define sinf fdlibm_sinf
    886 #define tan fdlibm_tan
    887 #define tanf fdlibm_tanf
    888 #define cosh fdlibm_cosh
    889 #define sinh fdlibm_sinh
    890 #define tanh fdlibm_tanh
    891 #define exp fdlibm_exp
    892 #define expf fdlibm_expf
    893 #define exp2 fdlibm_exp2
    894 #define exp2f fdlibm_exp2f
    895 #define log fdlibm_log
    896 #define logf fdlibm_logf
    897 #define log10 fdlibm_log10
    898 #define log10f fdlibm_log10f
    899 #define pow fdlibm_pow
    900 #define powf fdlibm_powf
    901 #define acosh fdlibm_acosh
    902 #define asinh fdlibm_asinh
    903 #define atanh fdlibm_atanh
    904 #define cbrt fdlibm_cbrt
    905 #define expm1 fdlibm_expm1
    906 #define hypot fdlibm_hypot
    907 #define hypotf fdlibm_hypotf
    908 #define log1p fdlibm_log1p
    909 #define log2 fdlibm_log2
    910 
    911 /* fdlibm kernel function */
    912 int	__kernel_rem_pio2(double*,double*,int,int,int);
    913 
    914 /* double precision kernel functions */
    915 #ifndef INLINE_REM_PIO2
    916 int	__ieee754_rem_pio2(double,double*);
    917 #endif
    918 double	__kernel_sin(double,double,int);
    919 double	__kernel_cos(double,double);
    920 double	__kernel_tan(double,double,int);
    921 double	__ldexp_exp(double,int);
    922 #ifdef _COMPLEX_H
    923 double complex __ldexp_cexp(double complex,int);
    924 #endif
    925 
    926 /* float precision kernel functions */
    927 #ifndef INLINE_REM_PIO2F
    928 int	__ieee754_rem_pio2f(float,double*);
    929 #endif
    930 #ifndef INLINE_KERNEL_SINDF
    931 float	__kernel_sindf(double);
    932 #endif
    933 #ifndef INLINE_KERNEL_COSDF
    934 float	__kernel_cosdf(double);
    935 #endif
    936 #ifndef INLINE_KERNEL_TANDF
    937 float	__kernel_tandf(double,int);
    938 #endif
    939 float	__ldexp_expf(float,int);
    940 #ifdef _COMPLEX_H
    941 float complex __ldexp_cexpf(float complex,int);
    942 #endif
    943 
    944 /* long double precision kernel functions */
    945 long double __kernel_sinl(long double, long double, int);
    946 long double __kernel_cosl(long double, long double);
    947 long double __kernel_tanl(long double, long double, int);
    948 
    949 #endif /* !_MATH_PRIVATE_H_ */