main.cpp (10174B)
1 /** 2 * Code used to generate manual values for 'correctly rounded' AbstractFloat 3 * tests in the CTS. 4 * 5 * These are generated in a C++ program, because it allows for easy access to 6 * higher than 64-bit floating point numbers (specifically 128-bit), which 7 * allows for calculating roundings when infinitely precise calculations are not 8 * precisely representable in 64-bit floats. This gets around the fact that 9 * numbers in Typescript are internally 64-bits, thus making it difficult to 10 * detect when rounding occurs for AbstractFloats without importing a higher 11 * precision floating point library. 12 * 13 * This codes is not meant to be automatically built/used by the CTS, but 14 * instead is a reference for how the values in af_data.ts were generated 15 */ 16 #include <cassert> 17 #include <cstdint> 18 #include <iostream> 19 #include <cfenv> 20 #include <format> 21 #include <iomanip> 22 #include <cmath> 23 #include <map> 24 #include <memory> 25 #include <set> 26 #include <vector> 27 28 /** The 'magic' that allows for calculating both roundings */ 29 // #pragma STDC FENV_ACCESS ON 30 31 /** Magic constants that should match the entries in constants.ts's kBit.f64 */ 32 constexpr double kF64NegativeMin = std::bit_cast<double>(0xFFEFFFFFFFFFFFFFull); 33 constexpr double kF64NegativeMax = std::bit_cast<double>(0x8010000000000000ull); 34 constexpr double kF64NegativeSubnormalMin = std::bit_cast<double>(0x800FFFFFFFFFFFFFull); 35 constexpr double kF64NegativeSubnormalMax = std::bit_cast<double>(0x8000000000000001ull); 36 constexpr double kF64PositiveSubnormalMin = std::bit_cast<double>(0x0000000000000001ull); 37 constexpr double kF64PositiveSubnormalMax = std::bit_cast<double>(0x000FFFFFFFFFFFFFull); 38 constexpr double kF64PositiveMin = std::bit_cast<double>(0x0010000000000000ull); 39 constexpr double kF64PositiveMax = std::bit_cast<double>(0x7FEFFFFFFFFFFFFFull); 40 41 /** 42 * Mapping from Numeric value -> TS representation, should include all the 43 * values that appear in kInterestingF64Values in math.ts 44 */ 45 const std::map<double, std::string> kInterestingF64s = { 46 { kF64NegativeMin, "kValue.f64.negative.min" }, 47 { -10.0, "-10.0" }, 48 { -1.0, "-1.0" }, 49 { -0.125, "-0.125" }, 50 { kF64NegativeMax, "kValue.f64.negative.max"}, 51 { kF64NegativeSubnormalMin, "kValue.f64.negative.subnormal.min" }, 52 { kF64NegativeSubnormalMax, "kValue.f64.negative.subnormal.max" }, 53 { 0.0, "0.0" }, 54 { kF64PositiveSubnormalMin, "kValue.f64.positive.subnormal.min" }, 55 { kF64PositiveSubnormalMax, "kValue.f64.positive.subnormal.max" }, 56 { kF64PositiveMin, "kValue.f64.positive.min" }, 57 { 0.125, "0.125" }, 58 { 1.0, "1.0" }, 59 { 10.0, "10.0" }, 60 { kF64PositiveMax, "kValue.f64.positive.max"} 61 }; 62 63 /** Additional values to use for testing 'fract' */ 64 const std::map<double, std::string> kFractF64s = { 65 { 0.5, "0.5" }, // 0.5 -> 0.5 66 { 1, "1" }, // 1 -> 0 67 { 2, "2" }, // 2 -> 0 68 { -0.5, "-0.5" }, // -0.5 -> 0.5 69 { -1, "-1" }, // -1 -> 0 70 { -2, "-2" }, // -2 -> 0 71 { 10.0000999999999997669, "10.0000999999999997669" }, // ~10.0001 -> ~0.0001 72 { -10.0000999999999997669, "-10.0000999999999997669" }, // -10.0001 -> ~0.9999 73 { 3937509.87755102012306, "3937509.87755102012306" }, // 3937509.87755102012306 -> ~0.877551..., not [0, 0.75], https://github.com/gpuweb/gpuweb/issues/4523 74 }; 75 76 /** 77 * Print out a string representation of a specific value that can be copied in 78 * a CTS test 79 */ 80 std::string printAbstractFloat(const double val) { 81 if (!std::isfinite(val)) { 82 if (val > 0) { 83 return "kValue.f64.positive.infinity"; 84 } 85 if (val < 0) { 86 return "kValue.f64.negative.infinity"; 87 } 88 assert("Generated a NaN"); 89 } 90 91 if (const auto iter = kInterestingF64s.find(val); iter != kInterestingF64s.end()) { 92 return iter->second; 93 } 94 95 std::stringstream ss; 96 // Print 'easy' to read integers as literals, otherwise dump the hex value 97 if ( val == round(val) && fabs(val) < 100000) { 98 ss << val; 99 } else { 100 ss << "reinterpretU64AsF64(0x" << std::hex << std::setfill('0') << std::setw(16) << std::bit_cast<uint64_t>(val) << "n) /* " << val << " */"; 101 } 102 return ss.str(); 103 } 104 105 /** Could this value potentially be affected by FTZ behaviour */ 106 bool couldBeFlushed(const double val) { 107 return std::fpclassify(val) == FP_SUBNORMAL; 108 } 109 110 /** 111 * Generate the 64-bit float interval that a higher precision value will 112 * quantized down to. 113 * 114 * If the value if exactly representable in 64-bit floating point this will be 115 * a singular value, otherwise it will be the two 64-bit values nearest to the 116 * value. 117 * 118 * This is done via manipulating the global process rounding mode, thus this 119 * code is non-reentrant, so should not be used in concurrent/asynchronous 120 * processes. 121 */ 122 std::tuple<double, double> quantizeToAbstractFloat(const long double val) { 123 const int round_mode = fegetround(); 124 125 assert(0 == fesetround(FE_DOWNWARD)); 126 const auto downward = static_cast<double>(val); 127 assert(0 == fesetround(FE_UPWARD)); 128 const auto upward = static_cast<double>(val); 129 130 assert(0 == fesetround(round_mode)); 131 132 return { downward, upward }; 133 } 134 135 /** 136 * Generates a string for an unary operation result that can be copied into a 137 * CTS test file. 138 */ 139 std::string printBinaryCase(const std::string &input, const std::vector<double> &result) { 140 assert(!result.empty()); 141 std::stringstream ss; 142 ss << "{ input: "; 143 ss << input; 144 ss << ", "; 145 ss << "expected: [ "; 146 if (!result.empty()) { 147 for (auto i = 0; i < result.size() - 1; i++) { 148 ss << "" << printAbstractFloat(result[i]) << ", "; 149 } 150 ss << printAbstractFloat(result.back()); 151 } 152 ss << " ] }"; 153 return ss.str(); 154 } 155 156 /** 157 * Generates a string for a binary operation result that can be copied into a 158 * CTS test file. 159 */ 160 std::string printBinaryCase(const std::string &lhs, const std::string &rhs, const std::vector<double> &result) { 161 assert(!result.empty()); 162 std::stringstream ss; 163 ss << "{ lhs: "; 164 ss << lhs; 165 ss << ", rhs: "; 166 ss << rhs; 167 ss << ", "; 168 ss << "expected: [ "; 169 if (!result.empty()) { 170 for (auto i = 0; i < result.size() - 1; i++) { 171 ss << "" << printAbstractFloat(result[i]) << ", "; 172 } 173 ss << printAbstractFloat(result.back()); 174 } 175 ss << " ] }"; 176 return ss.str(); 177 } 178 179 /** Function that performs a binary operation, i.e. addition, etc */ 180 typedef long double (*BinaryOp)(long double, long double); 181 182 const BinaryOp kAdditionOp= [](const long double lhs, const long double rhs) { 183 return lhs + rhs; 184 }; 185 186 const BinaryOp kSubtractionOp= [](const long double lhs, const long double rhs) { 187 return lhs - rhs; 188 }; 189 190 const BinaryOp kMultiplicationOp= [](const long double lhs, const long double rhs) { 191 return lhs * rhs; 192 }; 193 194 /** 195 * Calculates all of the possible results for a binary operation given the 196 * provided inputs. This handles both quantization and flushing behaviours. 197 */ 198 std::vector<double> calculateBinaryResults(const BinaryOp op, long double lhs, long double rhs) { 199 // CTS needs to consider that subnormals may be flushed to zero at 200 // any point, so applying potential flushings to get additional 201 // results. 202 std::set<double> results; 203 for (const auto l: couldBeFlushed(lhs) ? std::vector{0, lhs} : std::vector{lhs}) { 204 for (const auto r: couldBeFlushed(rhs) ? std::vector{0, rhs} : std::vector{rhs}) { 205 const auto [downward, upward] = quantizeToAbstractFloat(op(l, r)); 206 results.insert(downward); 207 results.insert(upward); 208 } 209 } 210 211 return { results.begin(), results.end() }; 212 } 213 214 /** 215 * Generates a string, that can be copied into a CTS test file, for all of the 216 * tests cases for a binary operation. 217 */ 218 std::string printBinaryOpCases(const BinaryOp op, const std::string& name) { 219 std::stringstream ss; 220 ss << "BEGIN " << name << " CASES" << std::endl; 221 for (const auto& [lhs, lhs_str] : kInterestingF64s) { 222 for (const auto& [rhs, rhs_str] : kInterestingF64s) { 223 ss << printBinaryCase(lhs_str, rhs_str, calculateBinaryResults(op, lhs, rhs)) << "," << std::endl; 224 } 225 } 226 ss << "END " << name << " CASES" << std::endl; 227 return ss.str(); 228 } 229 230 /** 231 * Generates a string, that can be copied into a CTS test file, for all of the 232 * tests cases for `fract`. WGSL defines frac(x) = x - floor(x). 233 */ 234 std::string printFractCases() { 235 std::stringstream ss; 236 ss << "BEGIN FRACT CASES" << std::endl; 237 // Do not have to calculate quantization/roundings for floor(input), 238 // because floor of a double is guaranteed to be a double, and all of 239 // the values in kInterestingF64s and kFractF64s are doubles. 240 for (const auto& [input, input_str] : kInterestingF64s) { 241 ss << printBinaryCase(input_str, calculateBinaryResults(kSubtractionOp, input, floor(input))) << "," << std::endl; 242 } 243 for (const auto& [input, input_str] : kFractF64s) { 244 ss << printBinaryCase(input_str, calculateBinaryResults(kSubtractionOp, input, floor(input))) << "," << std::endl; 245 } 246 ss << "END FRACT CASES" << std::endl; 247 return ss.str(); 248 } 249 250 int main() { 251 assert(sizeof(double) < sizeof(long double) && "Need higher precision long double"); 252 assert(sizeof(long double) == 16 && "Code assume 'proper' quad support, not some other higher precision floating point implementation"); 253 254 { 255 // Confirms that calculating f64 imprecise results generates two possible 256 // roundings. 257 const auto [begin, end] = 258 quantizeToAbstractFloat(static_cast<long double>(0.1) * static_cast<long double>(0.1)); 259 assert(std::bit_cast<uint64_t>(begin) == 0x3F847AE147AE147bull && 260 std::bit_cast<uint64_t>(end) == 0x3F847AE147AE147Cull && 261 "0.1 * 0.1 returned unexpected values"); 262 } 263 264 std::cout << printBinaryOpCases(kAdditionOp, "ADDITION") << std::endl; 265 std::cout << printBinaryOpCases(kSubtractionOp, "SUBTRACTION") << std::endl; 266 std::cout << printBinaryOpCases(kMultiplicationOp, "MULTIPLICATION") << std::endl; 267 std::cout << printFractCases() << std::endl; 268 269 return 0; 270 }