ReciprocalMulConstants.cpp (5888B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/ReciprocalMulConstants.h" 8 9 #include "mozilla/Assertions.h" 10 11 #include <limits> 12 13 using namespace js::jit; 14 15 template <typename UintT> 16 struct TwiceLarger; 17 18 template <> 19 struct TwiceLarger<uint32_t> { 20 using Type = uint64_t; 21 using SignedType = int64_t; 22 }; 23 24 template <> 25 struct TwiceLarger<uint64_t> { 26 using Type = js::Uint128; 27 using SignedType = js::Int128; 28 }; 29 30 template <typename DivConstants, typename UintT> 31 static auto ComputeDivisionConstants(UintT d, int maxLog) { 32 using UintT_Twice = typename TwiceLarger<UintT>::Type; 33 using IntT_Twice = typename TwiceLarger<UintT>::SignedType; 34 35 MOZ_ASSERT(maxLog >= 2 && maxLog <= std::numeric_limits<UintT>::digits); 36 37 // In what follows, 0 < d < 2^maxLog and d is not a power of 2. 38 MOZ_ASSERT(UintT_Twice(d) < (UintT_Twice(1) << maxLog) && 39 !mozilla::IsPowerOfTwo(d)); 40 41 // NOTE: The following explanation assumes T = uint32_t, but 42 // T = uint64_t works similar. 43 // 44 // Speeding up division by non power-of-2 constants is possible by 45 // calculating, during compilation, a value M such that high-order 46 // bits of M*n correspond to the result of the division of n by d. 47 // No value of M can serve this purpose for arbitrarily big values 48 // of n but, for optimizing integer division, we're just concerned 49 // with values of n whose absolute value is bounded (by fitting in 50 // an integer type, say). With this in mind, we'll find a constant 51 // M as above that works for -2^maxLog <= n < 2^maxLog; maxLog can 52 // then be 31 for signed division or 32 for unsigned division. 53 // 54 // The original presentation of this technique appears in Hacker's 55 // Delight, a book by Henry S. Warren, Jr.. A proof of correctness 56 // for our version follows; we'll denote maxLog by L in the proof, 57 // for conciseness. 58 // 59 // Formally, for |d| < 2^L, we'll compute two magic values M and s 60 // in the ranges 0 <= M < 2^(L+1) and 0 <= s <= L such that 61 // (M * n) >> (32 + s) = floor(n/d) if 0 <= n < 2^L 62 // (M * n) >> (32 + s) = ceil(n/d) - 1 if -2^L <= n < 0. 63 // 64 // Define p = 32 + s, M = ceil(2^p/d), and assume that s satisfies 65 // M - 2^p/d <= 2^(p-L)/d. (1) 66 // (Observe that p = CeilLog32(d) + L satisfies this, as the right 67 // side of (1) is at least one in this case). Then, 68 // 69 // a) If p <= CeilLog32(d) + L, then M < 2^(L+1) - 1. 70 // Proof: Indeed, M is monotone in p and, for p equal to the above 71 // value, the bounds 2^L > d >= 2^(p-L-1) + 1 readily imply that 72 // 2^p / d < 2^p/(d - 1) * (d - 1)/d 73 // <= 2^(L+1) * (1 - 1/d) < 2^(L+1) - 2. 74 // The claim follows by applying the ceiling function. 75 // 76 // b) For any 0 <= n < 2^L, floor(Mn/2^p) = floor(n/d). 77 // Proof: Put x = floor(Mn/2^p); it's the unique integer for which 78 // Mn/2^p - 1 < x <= Mn/2^p. (2) 79 // Using M >= 2^p/d on the LHS and (1) on the RHS, we get 80 // n/d - 1 < x <= n/d + n/(2^L d) < n/d + 1/d. 81 // Since x is an integer, it's not in the interval (n/d, (n+1)/d), 82 // and so n/d - 1 < x <= n/d, which implies x = floor(n/d). 83 // 84 // c) For any -2^L <= n < 0, floor(Mn/2^p) + 1 = ceil(n/d). 85 // Proof: The proof is similar. Equation (2) holds as above. Using 86 // M > 2^p/d (d isn't a power of 2) on the RHS and (1) on the LHS, 87 // n/d + n/(2^L d) - 1 < x < n/d. 88 // Using n >= -2^L and summing 1, 89 // n/d - 1/d < x + 1 < n/d + 1. 90 // Since x + 1 is an integer, this implies n/d <= x + 1 < n/d + 1. 91 // In other words, x + 1 = ceil(n/d). 92 // 93 // Condition (1) isn't necessary for the existence of M and s with 94 // the properties above. Hacker's Delight provides a slightly less 95 // restrictive condition when d >= 196611, at the cost of a 3-page 96 // proof of correctness, for the case L = 31. 97 // 98 // Note that, since d*M - 2^p = d - (2^p)%d, (1) can be written as 99 // 2^(p-L) >= d - (2^p)%d. 100 // In order to avoid overflow in the (2^p) % d calculation, we can 101 // compute it as (2^p-1) % d + 1, where 2^p-1 can then be computed 102 // without overflow as UINT64_MAX >> (64-p). 103 104 static constexpr auto UINT_BITS = std::numeric_limits<UintT>::digits; 105 static constexpr auto UINT_TWICE_BITS = 106 std::numeric_limits<UintT_Twice>::digits; 107 static constexpr auto UINT_TWICE_MAX = 108 std::numeric_limits<UintT_Twice>::max(); 109 110 // We now compute the least p >= UINT_BITS with the property above... 111 int32_t p = UINT_BITS; 112 while (true) { 113 auto u = (UintT_Twice(1) << (p - maxLog)); 114 auto v = (UINT_TWICE_MAX >> (UINT_TWICE_BITS - p)); 115 if (u + (v % UintT_Twice(d)) + UintT_Twice(1) < UintT_Twice(d)) { 116 p++; 117 } else { 118 break; 119 } 120 } 121 122 // ...and the corresponding M. For either the signed (L=31) or the 123 // unsigned (L=32) case, this value can be too large (cf. item a). 124 // Codegen can still multiply by M by multiplying by (M - 2^L) and 125 // adjusting the value afterwards, if this is the case. 126 DivConstants rmc; 127 rmc.multiplier = static_cast<IntT_Twice>( 128 (UINT_TWICE_MAX >> (UINT_TWICE_BITS - p)) / UintT_Twice(d) + 129 UintT_Twice(1)); 130 rmc.shiftAmount = p - UINT_BITS; 131 132 return rmc; 133 } 134 135 ReciprocalMulConstants::Div32Constants 136 ReciprocalMulConstants::computeDivisionConstants(uint32_t d, int maxLog) { 137 return ComputeDivisionConstants<Div32Constants>(d, maxLog); 138 } 139 140 ReciprocalMulConstants::Div64Constants 141 ReciprocalMulConstants::computeDivisionConstants(uint64_t d, int maxLog) { 142 return ComputeDivisionConstants<Div64Constants>(d, maxLog); 143 }