[ tor-browser ].git.dasho

AssemblerMatInt.cpp (8211B)
      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++
      7 //-*--===//
      8 //
      9 //  Part of the LLVM Project, under the Apache License v2.0 with LLVM
     10 //  Exceptions. See https://llvm.org/LICENSE.txt for license information.
     11 //  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
     12 //
     13 //===----------------------------------------------------------------------===//
     14 #include "mozilla/MathAlgorithms.h"
     15 
     16 #include "gc/Marking.h"
     17 #include "jit/AutoWritableJitCode.h"
     18 #include "jit/ExecutableAllocator.h"
     19 #include "jit/riscv64/Assembler-riscv64.h"
     20 #include "jit/riscv64/disasm/Disasm-riscv64.h"
     21 #include "vm/Realm.h"
     22 namespace js {
     23 namespace jit {
     24 void Assembler::RecursiveLi(Register rd, int64_t val) {
     25  if (val > 0 && RecursiveLiImplCount(val) > 2) {
     26    unsigned LeadingZeros = mozilla::CountLeadingZeroes64((uint64_t)val);
     27    uint64_t ShiftedVal = (uint64_t)val << LeadingZeros;
     28    int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1;
     29    if (countFillZero < RecursiveLiImplCount(val)) {
     30      RecursiveLiImpl(rd, ShiftedVal);
     31      srli(rd, rd, LeadingZeros);
     32      return;
     33    }
     34  }
     35  RecursiveLiImpl(rd, val);
     36 }
     37 
     38 int Assembler::RecursiveLiCount(int64_t val) {
     39  if (val > 0 && RecursiveLiImplCount(val) > 2) {
     40    unsigned LeadingZeros = mozilla::CountLeadingZeroes64((uint64_t)val);
     41    uint64_t ShiftedVal = (uint64_t)val << LeadingZeros;
     42    // Fill in the bits that will be shifted out with 1s. An example where
     43    // this helps is trailing one masks with 32 or more ones. This will
     44    // generate ADDI -1 and an SRLI.
     45    int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1;
     46    if (countFillZero < RecursiveLiImplCount(val)) {
     47      return countFillZero;
     48    }
     49  }
     50  return RecursiveLiImplCount(val);
     51 }
     52 
     53 inline int64_t signExtend(uint64_t V, int N) {
     54  return int64_t(V << (64 - N)) >> (64 - N);
     55 }
     56 
     57 void Assembler::RecursiveLiImpl(Register rd, int64_t Val) {
     58  if (is_int32(Val)) {
     59    // Depending on the active bits in the immediate Value v, the following
     60    // instruction sequences are emitted:
     61    //
     62    // v == 0                        : ADDI
     63    // v[0,12) != 0 && v[12,32) == 0 : ADDI
     64    // v[0,12) == 0 && v[12,32) != 0 : LUI
     65    // v[0,32) != 0                  : LUI+ADDI(W)
     66    int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
     67    int64_t Lo12 = Val << 52 >> 52;
     68 
     69    if (Hi20) {
     70      lui(rd, (int32_t)Hi20);
     71    }
     72 
     73    if (Lo12 || Hi20 == 0) {
     74      if (Hi20) {
     75        addiw(rd, rd, Lo12);
     76      } else {
     77        addi(rd, zero_reg, Lo12);
     78      }
     79    }
     80    return;
     81  }
     82 
     83  // In the worst case, for a full 64-bit constant, a sequence of 8
     84  // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be
     85  // emitted. Note that the first two instructions (LUI+ADDIW) can contribute
     86  // up to 32 bits while the following ADDI instructions contribute up to 12
     87  // bits each.
     88  //
     89  // On the first glance, implementing this seems to be possible by simply
     90  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many
     91  // left shift (SLLI) and immediate additions (ADDI) as needed. However, due
     92  // to the fact that ADDI performs a sign extended addition, doing it like
     93  // that would only be possible when at most 11 bits of the ADDI instructions
     94  // are used. Using all 12 bits of the ADDI instructions, like done by GAS,
     95  // actually requires that the constant is processed starting with the least
     96  // significant bit.
     97  //
     98  // In the following, constants are processed from LSB to MSB but instruction
     99  // emission is performed from MSB to LSB by recursively calling
    100  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
    101  // from the constant and the optimal shift amount, which can be greater than
    102  // 12 bits if the constant is sparse, is determined. Then, the shifted
    103  // remaining constant is processed recursively and gets emitted as soon as
    104  // it fits into 32 bits. The emission of the shifts and additions is
    105  // subsequently performed when the recursion returns.
    106 
    107  int64_t Lo12 = Val << 52 >> 52;
    108  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
    109  int ShiftAmount = 12 + mozilla::CountTrailingZeroes64((uint64_t)Hi52);
    110  Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
    111 
    112  // If the remaining bits don't fit in 12 bits, we might be able to reduce
    113  // the shift amount in order to use LUI which will zero the lower 12 bits.
    114  bool Unsigned = false;
    115  if (ShiftAmount > 12 && !is_int12(Hi52)) {
    116    if (is_int32((uint64_t)Hi52 << 12)) {
    117      // Reduce the shift amount and add zeros to the LSBs so it will match
    118      // LUI.
    119      ShiftAmount -= 12;
    120      Hi52 = (uint64_t)Hi52 << 12;
    121    }
    122  }
    123  RecursiveLi(rd, Hi52);
    124 
    125  if (Unsigned) {
    126  } else {
    127    slli(rd, rd, ShiftAmount);
    128  }
    129  if (Lo12) {
    130    addi(rd, rd, Lo12);
    131  }
    132 }
    133 
    134 int Assembler::RecursiveLiImplCount(int64_t Val) {
    135  int count = 0;
    136  if (is_int32(Val)) {
    137    // Depending on the active bits in the immediate Value v, the following
    138    // instruction sequences are emitted:
    139    //
    140    // v == 0                        : ADDI
    141    // v[0,12) != 0 && v[12,32) == 0 : ADDI
    142    // v[0,12) == 0 && v[12,32) != 0 : LUI
    143    // v[0,32) != 0                  : LUI+ADDI(W)
    144    int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
    145    int64_t Lo12 = Val << 52 >> 52;
    146 
    147    if (Hi20) {
    148      // lui(rd, (int32_t)Hi20);
    149      count++;
    150    }
    151 
    152    if (Lo12 || Hi20 == 0) {
    153      //   unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
    154      //   Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
    155      count++;
    156    }
    157    return count;
    158  }
    159 
    160  // In the worst case, for a full 64-bit constant, a sequence of 8
    161  // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be
    162  // emitted. Note that the first two instructions (LUI+ADDIW) can contribute
    163  // up to 32 bits while the following ADDI instructions contribute up to 12
    164  // bits each.
    165  //
    166  // On the first glance, implementing this seems to be possible by simply
    167  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many
    168  // left shift (SLLI) and immediate additions (ADDI) as needed. However, due
    169  // to the fact that ADDI performs a sign extended addition, doing it like
    170  // that would only be possible when at most 11 bits of the ADDI instructions
    171  // are used. Using all 12 bits of the ADDI instructions, like done by GAS,
    172  // actually requires that the constant is processed starting with the least
    173  // significant bit.
    174  //
    175  // In the following, constants are processed from LSB to MSB but instruction
    176  // emission is performed from MSB to LSB by recursively calling
    177  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
    178  // from the constant and the optimal shift amount, which can be greater than
    179  // 12 bits if the constant is sparse, is determined. Then, the shifted
    180  // remaining constant is processed recursively and gets emitted as soon as
    181  // it fits into 32 bits. The emission of the shifts and additions is
    182  // subsequently performed when the recursion returns.
    183 
    184  int64_t Lo12 = Val << 52 >> 52;
    185  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
    186  int ShiftAmount = 12 + mozilla::CountTrailingZeroes64((uint64_t)Hi52);
    187  Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
    188 
    189  // If the remaining bits don't fit in 12 bits, we might be able to reduce
    190  // the shift amount in order to use LUI which will zero the lower 12 bits.
    191  bool Unsigned = false;
    192  if (ShiftAmount > 12 && !is_int12(Hi52)) {
    193    if (is_int32((uint64_t)Hi52 << 12)) {
    194      // Reduce the shift amount and add zeros to the LSBs so it will match
    195      // LUI.
    196      ShiftAmount -= 12;
    197      Hi52 = (uint64_t)Hi52 << 12;
    198    }
    199  }
    200 
    201  count += RecursiveLiImplCount(Hi52);
    202 
    203  if (Unsigned) {
    204  } else {
    205    // slli(rd, rd, ShiftAmount);
    206    count++;
    207  }
    208  if (Lo12) {
    209    // addi(rd, rd, Lo12);
    210    count++;
    211  }
    212  return count;
    213 }
    214 
    215 }  // namespace jit
    216 }  // namespace js
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE