tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Float16.h (11440B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef vm_Float16_h
      8 #define vm_Float16_h
      9 
     10 #include "mozilla/FloatingPoint.h"
     11 #include "mozilla/MathAlgorithms.h"
     12 
     13 #include <cstdint>
     14 #include <cstring>
     15 #include <limits>
     16 #include <type_traits>
     17 
     18 namespace js {
     19 
     20 namespace half {
     21 // This is extracted from Version 2.2.0 of the half library by Christian Rau.
     22 // See https://sourceforge.net/projects/half/.
     23 // The original copyright and MIT license are reproduced below:
     24 
     25 // half - IEEE 754-based half-precision floating-point library.
     26 //
     27 // Copyright (c) 2012-2021 Christian Rau <rauy@users.sourceforge.net>
     28 //
     29 // Permission is hereby granted, free of charge, to any person obtaining a copy
     30 // of this software and associated documentation files (the "Software"), to deal
     31 // in the Software without restriction, including without limitation the rights
     32 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     33 // copies of the Software, and to permit persons to whom the Software is
     34 // furnished to do so, subject to the following conditions:
     35 //
     36 // The above copyright notice and this permission notice shall be included in
     37 // all copies or substantial portions of the Software.
     38 //
     39 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     40 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     41 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     42 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     43 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     44 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     45 // SOFTWARE.
     46 
     47 /// Type traits for floating-point bits.
     48 template <typename T>
     49 struct bits {
     50  typedef unsigned char type;
     51 };
     52 template <typename T>
     53 struct bits<const T> : bits<T> {};
     54 template <typename T>
     55 struct bits<volatile T> : bits<T> {};
     56 template <typename T>
     57 struct bits<const volatile T> : bits<T> {};
     58 
     59 /// Unsigned integer of (at least) 32 bits width.
     60 template <>
     61 struct bits<float> {
     62  typedef std::uint_least32_t type;
     63 };
     64 
     65 /// Unsigned integer of (at least) 64 bits width.
     66 template <>
     67 struct bits<double> {
     68  typedef std::uint_least64_t type;
     69 };
     70 
     71 /// Fastest unsigned integer of (at least) 32 bits width.
     72 typedef std::uint_fast32_t uint32;
     73 
     74 /// Half-precision overflow.
     75 /// \param sign half-precision value with sign bit only
     76 /// \return rounded overflowing half-precision value
     77 constexpr unsigned int overflow(unsigned int sign = 0) { return sign | 0x7C00; }
     78 
     79 /// Half-precision underflow.
     80 /// \param sign half-precision value with sign bit only
     81 /// \return rounded underflowing half-precision value
     82 constexpr unsigned int underflow(unsigned int sign = 0) { return sign; }
     83 
     84 /// Round half-precision number.
     85 /// \param value finite half-precision number to round
     86 /// \param g guard bit (most significant discarded bit)
     87 /// \param s sticky bit (or of all but the most significant discarded bits)
     88 /// \return rounded half-precision value
     89 constexpr unsigned int rounded(unsigned int value, int g, int s) {
     90  return value + (g & (s | value));
     91 }
     92 
     93 /// Convert IEEE single-precision to half-precision.
     94 /// \param value single-precision value to convert
     95 /// \return rounded half-precision value
     96 inline unsigned int float2half_impl(float value) {
     97  bits<float>::type fbits;
     98  std::memcpy(&fbits, &value, sizeof(float));
     99  unsigned int sign = (fbits >> 16) & 0x8000;
    100  fbits &= 0x7FFFFFFF;
    101  if (fbits >= 0x7F800000)
    102    return sign | 0x7C00 |
    103           ((fbits > 0x7F800000) ? (0x200 | ((fbits >> 13) & 0x3FF)) : 0);
    104  if (fbits >= 0x47800000) return overflow(sign);
    105  if (fbits >= 0x38800000)
    106    return rounded(
    107        sign | (((fbits >> 23) - 112) << 10) | ((fbits >> 13) & 0x3FF),
    108        (fbits >> 12) & 1, (fbits & 0xFFF) != 0);
    109  if (fbits >= 0x33000000) {
    110    int i = 125 - (fbits >> 23);
    111    fbits = (fbits & 0x7FFFFF) | 0x800000;
    112    return rounded(sign | (fbits >> (i + 1)), (fbits >> i) & 1,
    113                   (fbits & ((static_cast<uint32>(1) << i) - 1)) != 0);
    114  }
    115  if (fbits != 0) return underflow(sign);
    116  return sign;
    117 }
    118 
    119 /// Convert IEEE double-precision to half-precision.
    120 /// \param value double-precision value to convert
    121 /// \return rounded half-precision value
    122 inline unsigned int float2half_impl(double value) {
    123  bits<double>::type dbits;
    124  std::memcpy(&dbits, &value, sizeof(double));
    125  uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF;
    126  unsigned int sign = (hi >> 16) & 0x8000;
    127  hi &= 0x7FFFFFFF;
    128  if (hi >= 0x7FF00000)
    129    return sign | 0x7C00 |
    130           ((dbits & 0xFFFFFFFFFFFFF) ? (0x200 | ((hi >> 10) & 0x3FF)) : 0);
    131  if (hi >= 0x40F00000) return overflow(sign);
    132  if (hi >= 0x3F100000)
    133    return rounded(sign | (((hi >> 20) - 1008) << 10) | ((hi >> 10) & 0x3FF),
    134                   (hi >> 9) & 1, ((hi & 0x1FF) | lo) != 0);
    135  if (hi >= 0x3E600000) {
    136    int i = 1018 - (hi >> 20);
    137    hi = (hi & 0xFFFFF) | 0x100000;
    138    return rounded(sign | (hi >> (i + 1)), (hi >> i) & 1,
    139                   ((hi & ((static_cast<uint32>(1) << i) - 1)) | lo) != 0);
    140  }
    141  if ((hi | lo) != 0) return underflow(sign);
    142  return sign;
    143 }
    144 
    145 template <typename T>
    146 inline T half2float_impl(unsigned int value);
    147 
    148 /// Convert half-precision to IEEE double-precision.
    149 /// \param value half-precision value to convert
    150 /// \return double-precision value
    151 template <>
    152 inline double half2float_impl(unsigned int value) {
    153  uint32 hi = static_cast<uint32>(value & 0x8000) << 16;
    154  unsigned int abs = value & 0x7FFF;
    155  if (abs) {
    156    hi |= 0x3F000000 << static_cast<unsigned>(abs >= 0x7C00);
    157 
    158    // Mozilla change: Replace the loop with CountLeadingZeroes32.
    159    // for (; abs < 0x400; abs <<= 1, hi -= 0x100000);
    160    if (abs < 0x400) {
    161      // NOTE: CountLeadingZeroes32(0x400) is 21.
    162      uint32 shift = mozilla::CountLeadingZeroes32(uint32_t(abs)) - 21;
    163      abs <<= shift;
    164      hi -= shift * 0x100000;
    165    }
    166 
    167    hi += static_cast<uint32>(abs) << 10;
    168  }
    169  bits<double>::type dbits = static_cast<bits<double>::type>(hi) << 32;
    170  double out;
    171  std::memcpy(&out, &dbits, sizeof(double));
    172  return out;
    173 }
    174 
    175 /// Convert half-precision to IEEE single-precision.
    176 /// \param value half-precision value to convert
    177 /// \return single-precision value
    178 template <>
    179 inline float half2float_impl(unsigned int value) {
    180  bits<float>::type fbits = static_cast<bits<float>::type>(value & 0x8000)
    181                            << 16;
    182  unsigned int abs = value & 0x7FFF;
    183  if (abs) {
    184    fbits |= 0x38000000 << static_cast<unsigned>(abs >= 0x7C00);
    185 
    186    // Mozilla change: Replace the loop with CountLeadingZeroes32.
    187    // for (; abs < 0x400; abs <<= 1, fbits -= 0x800000);
    188    if (abs < 0x400) {
    189      // NOTE: CountLeadingZeroes32(0x400) is 21.
    190      uint32 shift = mozilla::CountLeadingZeroes32(uint32_t(abs)) - 21;
    191      abs <<= shift;
    192      fbits -= shift * 0x800000;
    193    }
    194 
    195    fbits += static_cast<bits<float>::type>(abs) << 13;
    196  }
    197 
    198  float out;
    199  std::memcpy(&out, &fbits, sizeof(float));
    200  return out;
    201 }
    202 }  // namespace half
    203 
    204 class float16 final {
    205  uint16_t val;
    206 
    207 public:
    208  constexpr float16() = default;
    209  constexpr float16(const float16&) = default;
    210 
    211  explicit float16(float x) : val(half::float2half_impl(x)) {}
    212  explicit float16(double x) : val(half::float2half_impl(x)) {}
    213 
    214  explicit float16(std::int8_t x) : float16(float(x)) {}
    215  explicit float16(std::int16_t x) : float16(float(x)) {}
    216  explicit float16(std::int32_t x) : float16(float(x)) {}
    217  explicit float16(std::int64_t x) : float16(double(x)) {}
    218 
    219  explicit float16(std::uint8_t x) : float16(float(x)) {}
    220  explicit float16(std::uint16_t x) : float16(float(x)) {}
    221  explicit float16(std::uint32_t x) : float16(float(x)) {}
    222  explicit float16(std::uint64_t x) : float16(double(x)) {}
    223 
    224  explicit float16(bool x) : float16(float(x)) {}
    225 
    226  constexpr float16& operator=(const float16&) = default;
    227 
    228  float16& operator=(float x) {
    229    *this = float16{x};
    230    return *this;
    231  }
    232 
    233  float16& operator=(double x) {
    234    *this = float16{x};
    235    return *this;
    236  }
    237 
    238  explicit operator float() const { return half::half2float_impl<float>(val); }
    239  explicit operator double() const {
    240    return half::half2float_impl<double>(val);
    241  }
    242 
    243  bool operator==(float16 x) const {
    244    uint16_t abs = val & 0x7FFF;
    245 
    246    // ±0 is equal to ±0.
    247    if (abs == 0) {
    248      return (x.val & 0x7FFF) == 0;
    249    }
    250 
    251    // If neither +0 nor NaN, then both bit representations must be equal.
    252    if (abs <= 0x7C00) {
    253      return val == x.val;
    254    }
    255 
    256    // NaN isn't equal to any value.
    257    return false;
    258  }
    259 
    260  bool operator!=(float16 x) const { return !(*this == x); }
    261 
    262  uint16_t toRawBits() const { return val; }
    263 
    264  static constexpr float16 fromRawBits(uint16_t bits) {
    265    float16 f16{};
    266    f16.val = bits;
    267    return f16;
    268  }
    269 };
    270 
    271 static_assert(sizeof(float16) == 2, "float16 has no extra padding");
    272 
    273 static_assert(
    274    std::is_trivial_v<float16>,
    275    "float16 must be trivial to be eligible for memcpy/memset optimizations");
    276 
    277 }  // namespace js
    278 
    279 template <>
    280 class std::numeric_limits<js::float16> {
    281 public:
    282  static constexpr bool is_specialized = true;
    283  static constexpr bool is_signed = true;
    284  static constexpr bool is_integer = false;
    285  static constexpr bool is_exact = false;
    286  static constexpr bool has_infinity = true;
    287  static constexpr bool has_quiet_NaN = true;
    288  static constexpr bool has_signaling_NaN = true;
    289  static constexpr std::float_denorm_style has_denorm = std::denorm_present;
    290  static constexpr bool has_denorm_loss = false;
    291  static constexpr std::float_round_style round_style = std::round_to_nearest;
    292  static constexpr bool is_iec559 = true;
    293  static constexpr bool is_bounded = true;
    294  static constexpr bool is_modulo = false;
    295  static constexpr int digits = 11;
    296  static constexpr int digits10 = 3;
    297  static constexpr int max_digits10 = 5;
    298  static constexpr int radix = 2;
    299  static constexpr int min_exponent = -13;
    300  static constexpr int min_exponent10 = -4;
    301  static constexpr int max_exponent = 16;
    302  static constexpr int max_exponent10 = 4;
    303  static constexpr bool traps = false;
    304  static constexpr bool tinyness_before = false;
    305 
    306  static constexpr auto min() noexcept {
    307    return js::float16::fromRawBits(0x400);
    308  }
    309  static constexpr auto lowest() noexcept {
    310    return js::float16::fromRawBits(0xFBFF);
    311  }
    312  static constexpr auto max() noexcept {
    313    return js::float16::fromRawBits(0x7BFF);
    314  }
    315  static constexpr auto epsilon() noexcept {
    316    return js::float16::fromRawBits(0x1400);
    317  }
    318  static constexpr auto round_error() noexcept {
    319    return js::float16::fromRawBits(0x3800);
    320  }
    321  static constexpr auto infinity() noexcept {
    322    return js::float16::fromRawBits(0x7C00);
    323  }
    324  static constexpr auto quiet_NaN() noexcept {
    325    return js::float16::fromRawBits(0x7E00);
    326  }
    327  static constexpr auto signaling_NaN() noexcept {
    328    return js::float16::fromRawBits(0x7D00);
    329  }
    330  static constexpr auto denorm_min() noexcept {
    331    return js::float16::fromRawBits(0x0001);
    332  }
    333 };
    334 
    335 template <>
    336 struct mozilla::FloatingPointTrait<js::float16> {
    337 protected:
    338  using Bits = uint16_t;
    339 
    340  static constexpr unsigned kExponentWidth = 5;
    341  static constexpr unsigned kSignificandWidth = 10;
    342 };
    343 
    344 #endif  // vm_Float16_h