xsum.h (5618B)
1 /* INTERFACE TO FUNCTIONS FOR EXACT SUMMATION. */ 2 3 /* Copyright 2015, 2018, 2021 Radford M. Neal 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be 14 included in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 #ifndef XSUM_H 26 #define XSUM_H 27 28 #include <stdlib.h> 29 #include <stdint.h> 30 #include <stddef.h> 31 32 /* CONSTANTS DEFINING THE FLOATING POINT FORMAT. */ 33 34 typedef double xsum_flt; /* C floating point type sums are done for */ 35 36 typedef int64_t xsum_int; /* Signed integer type for a fp value */ 37 typedef uint64_t xsum_uint; /* Unsigned integer type for a fp value */ 38 typedef int_fast16_t xsum_expint; /* Integer type for holding an exponent */ 39 40 #define XSUM_MANTISSA_BITS 52 /* Bits in fp mantissa, excludes implict 1 */ 41 #define XSUM_EXP_BITS 11 /* Bits in fp exponent */ 42 43 #define XSUM_MANTISSA_MASK \ 44 (((xsum_int)1 << XSUM_MANTISSA_BITS) - 1) /* Mask for mantissa bits */ 45 46 #define XSUM_EXP_MASK ((1 << XSUM_EXP_BITS) - 1) /* Mask for exponent */ 47 48 #define XSUM_EXP_BIAS \ 49 ((1 << (XSUM_EXP_BITS - 1)) - 1) /* Bias added to signed exponent */ 50 51 #define XSUM_SIGN_BIT \ 52 (XSUM_MANTISSA_BITS + XSUM_EXP_BITS) /* Position of sign bit */ 53 54 #define XSUM_SIGN_MASK ((xsum_uint)1 << XSUM_SIGN_BIT) /* Mask for sign bit */ 55 56 /* CONSTANTS DEFINING THE SMALL ACCUMULATOR FORMAT. */ 57 58 #define XSUM_SCHUNK_BITS 64 /* Bits in chunk of the small accumulator */ 59 typedef int64_t xsum_schunk; /* Integer type of small accumulator chunk */ 60 61 #define XSUM_LOW_EXP_BITS 5 /* # of low bits of exponent, in one chunk */ 62 63 #define XSUM_LOW_EXP_MASK \ 64 ((1 << XSUM_LOW_EXP_BITS) - 1) /* Mask for low-order exponent bits */ 65 66 #define XSUM_HIGH_EXP_BITS \ 67 (XSUM_EXP_BITS - XSUM_LOW_EXP_BITS) /* # of high exponent bits for index */ 68 69 #define XSUM_HIGH_EXP_MASK \ 70 ((1 << HIGH_EXP_BITS) - 1) /* Mask for high-order exponent bits */ 71 72 #define XSUM_SCHUNKS \ 73 ((1 << XSUM_HIGH_EXP_BITS) + 3) /* # of chunks in small accumulator */ 74 75 #define XSUM_LOW_MANTISSA_BITS \ 76 (1 << XSUM_LOW_EXP_BITS) /* Bits in low part of mantissa */ 77 78 #define XSUM_HIGH_MANTISSA_BITS \ 79 (XSUM_MANTISSA_BITS - XSUM_LOW_MANTISSA_BITS) /* Bits in high part */ 80 81 #define XSUM_LOW_MANTISSA_MASK \ 82 (((xsum_int)1 << XSUM_LOW_MANTISSA_BITS) - 1) /* Mask for low bits */ 83 84 #define XSUM_SMALL_CARRY_BITS \ 85 ((XSUM_SCHUNK_BITS - 1) - XSUM_MANTISSA_BITS) /* Bits sums can carry into */ 86 87 #define XSUM_SMALL_CARRY_TERMS \ 88 ((1 << XSUM_SMALL_CARRY_BITS) - 1) /* # terms can add before need prop. */ 89 90 typedef struct { 91 xsum_schunk chunk[XSUM_SCHUNKS]; /* Chunks making up small accumulator */ 92 xsum_int Inf; /* If non-zero, +Inf, -Inf, or NaN */ 93 xsum_int NaN; /* If non-zero, a NaN value with payload */ 94 int adds_until_propagate; /* Number of remaining adds before carry */ 95 } xsum_small_accumulator; /* propagation must be done again */ 96 97 /* CONSTANTS DEFINING THE LARGE ACCUMULATOR FORMAT. */ 98 99 #define XSUM_LCHUNK_BITS 64 /* Bits in chunk of the large accumulator */ 100 typedef uint64_t xsum_lchunk; /* Integer type of large accumulator chunk, 101 must be EXACTLY 64 bits in size */ 102 103 #define XSUM_LCOUNT_BITS (64 - XSUM_MANTISSA_BITS) /* # of bits in count */ 104 typedef int_least16_t xsum_lcount; /* Signed int type of counts for large acc.*/ 105 106 #define XSUM_LCHUNKS \ 107 (1 << (XSUM_EXP_BITS + 1)) /* # of chunks in large accumulator */ 108 109 typedef uint64_t xsum_used; /* Unsigned type for holding used flags */ 110 111 typedef struct { 112 xsum_lchunk chunk[XSUM_LCHUNKS]; /* Chunks making up large accumulator */ 113 xsum_lcount count[XSUM_LCHUNKS]; /* Counts of # adds remaining for chunks, 114 or -1 if not used yet or special. */ 115 xsum_used chunks_used[XSUM_LCHUNKS / 64]; /* Bits indicate chunks in use */ 116 xsum_used used_used; /* Bits indicate chunk_used entries not 0 */ 117 xsum_small_accumulator sacc; /* The small accumulator to condense into */ 118 } xsum_large_accumulator; 119 120 /* TYPE FOR LENGTHS OF ARRAYS. Must be a signed integer type. Set to 121 ptrdiff_t here on the assumption that this will be big enough, but 122 not unnecessarily big, which seems to be true. */ 123 124 typedef ptrdiff_t xsum_length; 125 126 /* FUNCTIONS FOR EXACT SUMMATION, WITH POSSIBLE DIVISION BY AN INTEGER. */ 127 128 void xsum_small_init(xsum_small_accumulator*); 129 void xsum_small_add1(xsum_small_accumulator*, xsum_flt); 130 xsum_flt xsum_small_round(xsum_small_accumulator*); 131 132 /* FUNCTIONS USEFUL FOR TESTING AND DEBUGGING. */ 133 134 void xsum_small_display(xsum_small_accumulator*); 135 136 /* DEBUG FLAG. Set to non-zero for debug ouptut. Ignored unless xsum.c 137 is compiled with -DDEBUG. */ 138 139 extern int xsum_debug; 140 141 #endif