float16-rounding.js (3219B)
1 const tests = [ 2 // Float16 subnormal numbers. 3 { 4 value: 2.9802322387695312e-8, 5 f64: 0x3e60_0000_0000_0000n, 6 f32: 0x3300_0000, 7 f16: 0x0, 8 }, 9 { 10 value: 2.980232238769532e-8, 11 f64: 0x3e60_0000_0000_0001n, 12 f32: 0x3300_0000, 13 f16: 0x1, 14 }, 15 16 { 17 value: 8.940696716308592e-8, 18 f64: 0x3e77_ffff_ffff_ffffn, 19 f32: 0x33c0_0000, 20 f16: 0x1, 21 }, 22 { 23 value: 8.940696716308594e-8, 24 f64: 0x3e78_0000_0000_0000n, 25 f32: 0x33c0_0000, 26 f16: 0x2, 27 }, 28 29 { 30 value: 0.000060945749282836914, 31 f64: 0x3f0f_f400_0000_0000n, 32 f32: 0x387f_a000, 33 f16: 0x3fe, 34 }, 35 { 36 value: 0.00006094574928283692, 37 f64: 0x3f0f_f400_0000_0001n, 38 f32: 0x387f_a000, 39 f16: 0x3ff, 40 }, 41 42 { 43 value: 0.0000610053539276123, 44 f64: 0x3f0f_fbff_ffff_ffffn, 45 f32: 0x387f_e000, 46 f16: 0x3ff, 47 }, 48 { 49 value: 0.000061005353927612305, 50 f64: 0x3f0f_fc00_0000_0000n, 51 f32: 0x387f_e000, 52 f16: 0x400, 53 }, 54 55 // Float16 normal numbers. 56 { 57 value: 0.000061035154431010596, 58 f64: 0x3f0f_ffff_f000_0000n, 59 f32: 0x3880_0000, 60 f16: 0x400, 61 }, 62 { 63 value: 0.00006103515625, 64 f64: 0x3f10_0000_0000_0000n, 65 f32: 0x3880_0000, 66 f16: 0x400, 67 }, 68 { 69 value: 0.0000610649585723877, 70 f64: 0x3f10_0200_0000_0000n, 71 f32: 0x3880_1000, 72 f16: 0x400, 73 }, 74 { 75 value: 0.00006106495857238771, 76 f64: 0x3f10_0200_0000_0001n, 77 f32: 0x3880_1000, 78 f16: 0x401, 79 }, 80 { 81 value: 0.00006112456321716307, 82 f64: 0x3f10_05ff_ffff_ffffn, 83 f32: 0x3880_3000, 84 f16: 0x401, 85 }, 86 87 // Underflow to zero. 88 { 89 value: 2.980232594040899e-8, 90 f64: 0x3e60_0000_2000_0000n, 91 f32: 0x3300_0001, 92 f16: 0x1, 93 }, 94 { 95 value: 2.9802322387695312e-8, 96 f64: 0x3e60_0000_0000_0000n, 97 f32: 0x3300_0000, 98 f16: 0x0, 99 }, 100 { 101 value: 2.9802320611338473e-8, 102 f64: 0x3e5f_ffff_e000_0000n, 103 f32: 0x32ff_ffff, 104 f16: 0x0, 105 }, 106 107 // Overflow to infinity. 108 { 109 value: 65536, 110 f64: 0x40f0_0000_0000_0000n, 111 f32: 0x4780_0000, 112 f16: 0x7c00, 113 }, 114 { 115 value: 65520, 116 f64: 0x40ef_fe00_0000_0000n, 117 f32: 0x477f_f000, 118 f16: 0x7c00, 119 }, 120 { 121 value: 65504, 122 f64: 0x40ef_fc00_0000_0000n, 123 f32: 0x477f_e000, 124 f16: 0x7bff, 125 }, 126 ]; 127 128 const ta_f64 = new Float64Array(1); 129 const ta_f32 = new Float32Array(1); 130 const ta_f16 = new Float16Array(1); 131 132 const ta_u64 = new BigUint64Array(ta_f64.buffer); 133 const ta_u32 = new Uint32Array(ta_f32.buffer); 134 const ta_u16 = new Uint16Array(ta_f16.buffer); 135 136 for (let i = 0; i < 1000; ++i) { 137 let {value, f64, f32, f16} = tests[i % tests.length]; 138 139 ta_f64[0] = value; 140 assertEq(ta_u64[0], f64); 141 142 ta_f32[0] = value; 143 assertEq(ta_u32[0], f32); 144 145 ta_f16[0] = value; 146 assertEq(ta_u16[0], f16); 147 148 assertEq(Math.f16round(value), ta_f16[0]); 149 } 150 151 // Test negative case. 152 for (let i = 0; i < 1000; ++i) { 153 let {value, f64, f32, f16} = tests[i % tests.length]; 154 155 value = value * -1; 156 f64 = (f64 | 0x8000_0000_0000_0000n); 157 f32 = (f32 | 0x8000_0000) >>> 0; 158 f16 = (f16 | 0x8000); 159 160 ta_f64[0] = value; 161 assertEq(ta_u64[0], f64); 162 163 ta_f32[0] = value; 164 assertEq(ta_u32[0], f32); 165 166 ta_f16[0] = value; 167 assertEq(ta_u16[0], f16); 168 169 assertEq(Math.f16round(value), ta_f16[0]); 170 }