tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ReciprocalMulConstants.cpp (5888B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/ReciprocalMulConstants.h"
      8 
      9 #include "mozilla/Assertions.h"
     10 
     11 #include <limits>
     12 
     13 using namespace js::jit;
     14 
     15 template <typename UintT>
     16 struct TwiceLarger;
     17 
     18 template <>
     19 struct TwiceLarger<uint32_t> {
     20  using Type = uint64_t;
     21  using SignedType = int64_t;
     22 };
     23 
     24 template <>
     25 struct TwiceLarger<uint64_t> {
     26  using Type = js::Uint128;
     27  using SignedType = js::Int128;
     28 };
     29 
     30 template <typename DivConstants, typename UintT>
     31 static auto ComputeDivisionConstants(UintT d, int maxLog) {
     32  using UintT_Twice = typename TwiceLarger<UintT>::Type;
     33  using IntT_Twice = typename TwiceLarger<UintT>::SignedType;
     34 
     35  MOZ_ASSERT(maxLog >= 2 && maxLog <= std::numeric_limits<UintT>::digits);
     36 
     37  // In what follows, 0 < d < 2^maxLog and d is not a power of 2.
     38  MOZ_ASSERT(UintT_Twice(d) < (UintT_Twice(1) << maxLog) &&
     39             !mozilla::IsPowerOfTwo(d));
     40 
     41  // NOTE: The following explanation assumes T = uint32_t, but
     42  // T = uint64_t works similar.
     43  //
     44  // Speeding up division by non power-of-2 constants is possible by
     45  // calculating, during compilation, a value M such that high-order
     46  // bits of M*n correspond to the result of the division of n by d.
     47  // No value of M can serve this purpose for arbitrarily big values
     48  // of n but, for optimizing integer division, we're just concerned
     49  // with values of n whose absolute value is bounded (by fitting in
     50  // an integer type, say). With this in mind, we'll find a constant
     51  // M as above that works for -2^maxLog <= n < 2^maxLog; maxLog can
     52  // then be 31 for signed division or 32 for unsigned division.
     53  //
     54  // The original presentation of this technique appears in Hacker's
     55  // Delight, a book by Henry S. Warren, Jr.. A proof of correctness
     56  // for our version follows; we'll denote maxLog by L in the proof,
     57  // for conciseness.
     58  //
     59  // Formally, for |d| < 2^L, we'll compute two magic values M and s
     60  // in the ranges 0 <= M < 2^(L+1) and 0 <= s <= L such that
     61  //     (M * n) >> (32 + s) = floor(n/d)    if    0 <= n < 2^L
     62  //     (M * n) >> (32 + s) = ceil(n/d) - 1 if -2^L <= n < 0.
     63  //
     64  // Define p = 32 + s, M = ceil(2^p/d), and assume that s satisfies
     65  //                     M - 2^p/d <= 2^(p-L)/d.                 (1)
     66  // (Observe that p = CeilLog32(d) + L satisfies this, as the right
     67  // side of (1) is at least one in this case). Then,
     68  //
     69  // a) If p <= CeilLog32(d) + L, then M < 2^(L+1) - 1.
     70  // Proof: Indeed, M is monotone in p and, for p equal to the above
     71  // value, the bounds 2^L > d >= 2^(p-L-1) + 1 readily imply that
     72  //    2^p / d <  2^p/(d - 1) * (d - 1)/d
     73  //            <= 2^(L+1) * (1 - 1/d) < 2^(L+1) - 2.
     74  // The claim follows by applying the ceiling function.
     75  //
     76  // b) For any 0 <= n < 2^L, floor(Mn/2^p) = floor(n/d).
     77  // Proof: Put x = floor(Mn/2^p); it's the unique integer for which
     78  //                    Mn/2^p - 1 < x <= Mn/2^p.                (2)
     79  // Using M >= 2^p/d on the LHS and (1) on the RHS, we get
     80  //           n/d - 1 < x <= n/d + n/(2^L d) < n/d + 1/d.
     81  // Since x is an integer, it's not in the interval (n/d, (n+1)/d),
     82  // and so n/d - 1 < x <= n/d, which implies x = floor(n/d).
     83  //
     84  // c) For any -2^L <= n < 0, floor(Mn/2^p) + 1 = ceil(n/d).
     85  // Proof: The proof is similar. Equation (2) holds as above. Using
     86  // M > 2^p/d (d isn't a power of 2) on the RHS and (1) on the LHS,
     87  //                 n/d + n/(2^L d) - 1 < x < n/d.
     88  // Using n >= -2^L and summing 1,
     89  //                  n/d - 1/d < x + 1 < n/d + 1.
     90  // Since x + 1 is an integer, this implies n/d <= x + 1 < n/d + 1.
     91  // In other words, x + 1 = ceil(n/d).
     92  //
     93  // Condition (1) isn't necessary for the existence of M and s with
     94  // the properties above. Hacker's Delight provides a slightly less
     95  // restrictive condition when d >= 196611, at the cost of a 3-page
     96  // proof of correctness, for the case L = 31.
     97  //
     98  // Note that, since d*M - 2^p = d - (2^p)%d, (1) can be written as
     99  //                   2^(p-L) >= d - (2^p)%d.
    100  // In order to avoid overflow in the (2^p) % d calculation, we can
    101  // compute it as (2^p-1) % d + 1, where 2^p-1 can then be computed
    102  // without overflow as UINT64_MAX >> (64-p).
    103 
    104  static constexpr auto UINT_BITS = std::numeric_limits<UintT>::digits;
    105  static constexpr auto UINT_TWICE_BITS =
    106      std::numeric_limits<UintT_Twice>::digits;
    107  static constexpr auto UINT_TWICE_MAX =
    108      std::numeric_limits<UintT_Twice>::max();
    109 
    110  // We now compute the least p >= UINT_BITS with the property above...
    111  int32_t p = UINT_BITS;
    112  while (true) {
    113    auto u = (UintT_Twice(1) << (p - maxLog));
    114    auto v = (UINT_TWICE_MAX >> (UINT_TWICE_BITS - p));
    115    if (u + (v % UintT_Twice(d)) + UintT_Twice(1) < UintT_Twice(d)) {
    116      p++;
    117    } else {
    118      break;
    119    }
    120  }
    121 
    122  // ...and the corresponding M. For either the signed (L=31) or the
    123  // unsigned (L=32) case, this value can be too large (cf. item a).
    124  // Codegen can still multiply by M by multiplying by (M - 2^L) and
    125  // adjusting the value afterwards, if this is the case.
    126  DivConstants rmc;
    127  rmc.multiplier = static_cast<IntT_Twice>(
    128      (UINT_TWICE_MAX >> (UINT_TWICE_BITS - p)) / UintT_Twice(d) +
    129      UintT_Twice(1));
    130  rmc.shiftAmount = p - UINT_BITS;
    131 
    132  return rmc;
    133 }
    134 
    135 ReciprocalMulConstants::Div32Constants
    136 ReciprocalMulConstants::computeDivisionConstants(uint32_t d, int maxLog) {
    137  return ComputeDivisionConstants<Div32Constants>(d, maxLog);
    138 }
    139 
    140 ReciprocalMulConstants::Div64Constants
    141 ReciprocalMulConstants::computeDivisionConstants(uint64_t d, int maxLog) {
    142  return ComputeDivisionConstants<Div64Constants>(d, maxLog);
    143 }