tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

EffectiveAddressAnalysis.cpp (7651B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/EffectiveAddressAnalysis.h"
      8 
      9 #include "jit/IonAnalysis.h"
     10 #include "jit/MIR-wasm.h"
     11 #include "jit/MIR.h"
     12 #include "jit/MIRGenerator.h"
     13 #include "jit/MIRGraph.h"
     14 
     15 using namespace js;
     16 using namespace jit;
     17 
     18 // This is a very simple pass that tries to merge 32-bit shift-and-add into a
     19 // single MIR node.  It results from a lot of experimentation with more
     20 // aggressive load-effective-address formation, as documented in bug 1970035.
     21 //
     22 // This implementation only covers the two-addend form
     23 // `base + (index << {1,2,3})` (and the same the other way around).  Previous
     24 // experimentation showed that, while the 3-addend form
     25 // `base + (index << {1,2,3}) + constant` can be reliably identified and merged
     26 // into a single node, it doesn't reliably produce faster code.  Also, the
     27 // implementation complexity is much higher than what is below.
     28 //
     29 // 3-addend LEAs can be completed in a single cycle on high-end Intels, but
     30 // take 2 cycles on lower end Intels.  By comparison the 2-addend form is
     31 // believed to take a single cycle on all Intels.  On arm64, the 3-addend form
     32 // is not supported in a single machine instruction, and so can require zero,
     33 // one or two extra instructions, depending on the size of the constant,
     34 // possibly an extra register, and consequently some number of extra cycles.
     35 //
     36 // Because of this, restricting the transformation to the 2-addend case
     37 // simplifies both the implementation and more importantly the cost-tradeoff
     38 // landscape.  It gains much of the wins of the 3-addend case while more
     39 // reliably producing nodes that can execute in a single cycle on all primary
     40 // targets.
     41 
     42 // =====================================================================
     43 
     44 // On non-x86/x64 targets, incorporating any non-zero constant (displacement)
     45 // in an EffectiveAddress2 node is not free, because the constant may have to
     46 // be synthesised into a register in the back end.  Worse, on all such targets,
     47 // arbitrary 32-bit constants will take two instructions to synthesise, which
     48 // can lead to a net performance loss.
     49 //
     50 // `OffsetIsSmallEnough` is used in the logic below to restrict constants to
     51 // single-instruction forms.  It is necessarily target-dependent.  Note this is
     52 // merely a heuristic -- the resulting code should be *correct* on all targets
     53 // regardless of the value returned.
     54 
     55 static bool OffsetIsSmallEnough(int32_t imm) {
     56 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     57  // For x86_32 and x86_64 we have the luxury of being able to roll in any
     58  // 32-bit `imm` value for free.
     59  return true;
     60 #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_ARM)
     61  // On arm64, this can be synthesised in one insn as `movz #imm` or
     62  // `movn #imm`.  arm32 is similar.
     63  return imm >= -0xFFFF && imm <= 0xFFFF;
     64 #elif defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_LOONG64) || \
     65    defined(JS_CODEGEN_MIPS64)
     66  return imm >= -0xFFF && imm <= 0xFFF;
     67 #elif defined(JS_CODEGEN_WASM32) || defined(JS_CODEGEN_NONE)
     68  return true;
     69 #else
     70 #  error "This needs to be filled in for your platform"
     71 #endif
     72 }
     73 
     74 // If `def` is of the form `x << {1,2,3}`, return `x` and the shift value.
     75 // Otherwise return the pair `(nullptr, 0)`.
     76 static std::pair<MDefinition*, int32_t> IsShiftBy123(MDefinition* def) {
     77  MOZ_ASSERT(def->type() == MIRType::Int32);
     78  if (!def->isLsh()) {
     79    return std::pair(nullptr, 0);
     80  }
     81  MLsh* lsh = def->toLsh();
     82  if (lsh->isRecoveredOnBailout()) {
     83    return std::pair(nullptr, 0);
     84  }
     85  MDefinition* shamt = lsh->rhs();
     86  MOZ_ASSERT(shamt->type() == MIRType::Int32);
     87  MConstant* con = shamt->maybeConstantValue();
     88  if (!con || con->toInt32() < 1 || con->toInt32() > 3) {
     89    return std::pair(nullptr, 0);
     90  }
     91  return std::pair(lsh->lhs(), con->toInt32());
     92 }
     93 
     94 // Try to convert `base + (index << {1,2,3})` into either an MEffectiveAddress2
     95 // node (if base is a constant) or an MEffectiveAddress3 node with zero
     96 // displacement (if base is non-constant).
     97 static void TryMatchShiftAdd(TempAllocator& alloc, MAdd* root) {
     98  MOZ_ASSERT(root->isAdd());
     99  MOZ_ASSERT(root->type() == MIRType::Int32);
    100  MOZ_ASSERT(root->hasUses());
    101 
    102  // Try to match
    103  //
    104  //   base + (index << {1,2,3})
    105  //
    106  // in which the addends can appear in either order.  Obviously the shift
    107  // amount must be a constant, but `base` and `index` can be anything.
    108 
    109  MDefinition* base = nullptr;
    110  MDefinition* index = nullptr;
    111  int32_t shift = 0;
    112 
    113  auto pair = IsShiftBy123(root->rhs());
    114  MOZ_ASSERT((pair.first == nullptr) == (pair.second == 0));
    115  if (pair.first) {
    116    base = root->lhs();
    117    index = pair.first;
    118    shift = pair.second;
    119  } else {
    120    pair = IsShiftBy123(root->lhs());
    121    MOZ_ASSERT((pair.first == nullptr) == (pair.second == 0));
    122    if (pair.first) {
    123      base = root->rhs();
    124      index = pair.first;
    125      shift = pair.second;
    126    }
    127  }
    128 
    129  if (!base) {
    130    return;
    131  }
    132  MOZ_ASSERT(shift >= 1 && shift <= 3);
    133 
    134  // IsShiftBy123 ensures that the MLsh node is not `recoveredOnBailout`, and
    135  // this test takes care of the MAdd node.
    136  if (root->isRecoveredOnBailout()) {
    137    return;
    138  }
    139 
    140  // Pattern matching succeeded.
    141  Scale scale = ShiftToScale(shift);
    142  MOZ_ASSERT(scale != TimesOne);
    143 
    144  MInstruction* replacement = nullptr;
    145  if (base->maybeConstantValue()) {
    146    int32_t baseValue = base->maybeConstantValue()->toInt32();
    147    if (baseValue == 0) {
    148      // We'd only be rolling one operation -- the shift -- into the result, so
    149      // don't bother.
    150      return;
    151    }
    152    if (!OffsetIsSmallEnough(baseValue)) {
    153      // `baseValue` would take more than one insn to get into a register,
    154      // which makes the change less likely to be a win.  See bug 1979829.
    155      return;
    156    }
    157    replacement = MEffectiveAddress2::New(alloc, index, scale, baseValue);
    158  } else {
    159    replacement = MEffectiveAddress3::New(alloc, base, index, scale, 0);
    160  }
    161 
    162  root->replaceAllUsesWith(replacement);
    163  root->block()->insertAfter(root, replacement);
    164 
    165  if (JitSpewEnabled(JitSpew_EAA)) {
    166    JitSpewCont(JitSpew_EAA, "  create: '");
    167    DumpMIRDefinition(JitSpewPrinter(), replacement, /*showDetails=*/false);
    168    JitSpewCont(JitSpew_EAA, "'\n");
    169  }
    170 }
    171 
    172 // =====================================================================
    173 //
    174 // Top level driver.
    175 
    176 bool EffectiveAddressAnalysis::analyze() {
    177  JitSpew(JitSpew_EAA, "Begin");
    178 
    179  for (ReversePostorderIterator block(graph_.rpoBegin());
    180       block != graph_.rpoEnd(); block++) {
    181    // Traverse backwards through `block`, trying to rewrite each MIR node if
    182    // we can.  Rewriting may cause nodes to become dead.  We do not try to
    183    // remove those here, but leave them for a later DCE pass to clear up.
    184 
    185    MInstructionReverseIterator ri(block->rbegin());
    186    while (ri != block->rend()) {
    187      // Nodes are added immediately after `curr`, so the iterator won't
    188      // traverse them, since we're iterating backwards.
    189      MInstruction* curr = *ri;
    190      ri++;
    191 
    192      if (MOZ_LIKELY(!curr->isAdd())) {
    193        continue;
    194      }
    195      if (curr->type() != MIRType::Int32 || !curr->hasUses()) {
    196        continue;
    197      }
    198 
    199      // This check needs to precede any allocation done in this loop.
    200      if (MOZ_UNLIKELY(!graph_.alloc().ensureBallast())) {
    201        return false;
    202      }
    203 
    204      TryMatchShiftAdd(graph_.alloc(), curr->toAdd());
    205    }
    206  }
    207 
    208  JitSpew(JitSpew_EAA, "End");
    209  return true;
    210 }