EffectiveAddressAnalysis.cpp (7651B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/EffectiveAddressAnalysis.h" 8 9 #include "jit/IonAnalysis.h" 10 #include "jit/MIR-wasm.h" 11 #include "jit/MIR.h" 12 #include "jit/MIRGenerator.h" 13 #include "jit/MIRGraph.h" 14 15 using namespace js; 16 using namespace jit; 17 18 // This is a very simple pass that tries to merge 32-bit shift-and-add into a 19 // single MIR node. It results from a lot of experimentation with more 20 // aggressive load-effective-address formation, as documented in bug 1970035. 21 // 22 // This implementation only covers the two-addend form 23 // `base + (index << {1,2,3})` (and the same the other way around). Previous 24 // experimentation showed that, while the 3-addend form 25 // `base + (index << {1,2,3}) + constant` can be reliably identified and merged 26 // into a single node, it doesn't reliably produce faster code. Also, the 27 // implementation complexity is much higher than what is below. 28 // 29 // 3-addend LEAs can be completed in a single cycle on high-end Intels, but 30 // take 2 cycles on lower end Intels. By comparison the 2-addend form is 31 // believed to take a single cycle on all Intels. On arm64, the 3-addend form 32 // is not supported in a single machine instruction, and so can require zero, 33 // one or two extra instructions, depending on the size of the constant, 34 // possibly an extra register, and consequently some number of extra cycles. 35 // 36 // Because of this, restricting the transformation to the 2-addend case 37 // simplifies both the implementation and more importantly the cost-tradeoff 38 // landscape. It gains much of the wins of the 3-addend case while more 39 // reliably producing nodes that can execute in a single cycle on all primary 40 // targets. 41 42 // ===================================================================== 43 44 // On non-x86/x64 targets, incorporating any non-zero constant (displacement) 45 // in an EffectiveAddress2 node is not free, because the constant may have to 46 // be synthesised into a register in the back end. Worse, on all such targets, 47 // arbitrary 32-bit constants will take two instructions to synthesise, which 48 // can lead to a net performance loss. 49 // 50 // `OffsetIsSmallEnough` is used in the logic below to restrict constants to 51 // single-instruction forms. It is necessarily target-dependent. Note this is 52 // merely a heuristic -- the resulting code should be *correct* on all targets 53 // regardless of the value returned. 54 55 static bool OffsetIsSmallEnough(int32_t imm) { 56 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) 57 // For x86_32 and x86_64 we have the luxury of being able to roll in any 58 // 32-bit `imm` value for free. 59 return true; 60 #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_ARM) 61 // On arm64, this can be synthesised in one insn as `movz #imm` or 62 // `movn #imm`. arm32 is similar. 63 return imm >= -0xFFFF && imm <= 0xFFFF; 64 #elif defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_LOONG64) || \ 65 defined(JS_CODEGEN_MIPS64) 66 return imm >= -0xFFF && imm <= 0xFFF; 67 #elif defined(JS_CODEGEN_WASM32) || defined(JS_CODEGEN_NONE) 68 return true; 69 #else 70 # error "This needs to be filled in for your platform" 71 #endif 72 } 73 74 // If `def` is of the form `x << {1,2,3}`, return `x` and the shift value. 75 // Otherwise return the pair `(nullptr, 0)`. 76 static std::pair<MDefinition*, int32_t> IsShiftBy123(MDefinition* def) { 77 MOZ_ASSERT(def->type() == MIRType::Int32); 78 if (!def->isLsh()) { 79 return std::pair(nullptr, 0); 80 } 81 MLsh* lsh = def->toLsh(); 82 if (lsh->isRecoveredOnBailout()) { 83 return std::pair(nullptr, 0); 84 } 85 MDefinition* shamt = lsh->rhs(); 86 MOZ_ASSERT(shamt->type() == MIRType::Int32); 87 MConstant* con = shamt->maybeConstantValue(); 88 if (!con || con->toInt32() < 1 || con->toInt32() > 3) { 89 return std::pair(nullptr, 0); 90 } 91 return std::pair(lsh->lhs(), con->toInt32()); 92 } 93 94 // Try to convert `base + (index << {1,2,3})` into either an MEffectiveAddress2 95 // node (if base is a constant) or an MEffectiveAddress3 node with zero 96 // displacement (if base is non-constant). 97 static void TryMatchShiftAdd(TempAllocator& alloc, MAdd* root) { 98 MOZ_ASSERT(root->isAdd()); 99 MOZ_ASSERT(root->type() == MIRType::Int32); 100 MOZ_ASSERT(root->hasUses()); 101 102 // Try to match 103 // 104 // base + (index << {1,2,3}) 105 // 106 // in which the addends can appear in either order. Obviously the shift 107 // amount must be a constant, but `base` and `index` can be anything. 108 109 MDefinition* base = nullptr; 110 MDefinition* index = nullptr; 111 int32_t shift = 0; 112 113 auto pair = IsShiftBy123(root->rhs()); 114 MOZ_ASSERT((pair.first == nullptr) == (pair.second == 0)); 115 if (pair.first) { 116 base = root->lhs(); 117 index = pair.first; 118 shift = pair.second; 119 } else { 120 pair = IsShiftBy123(root->lhs()); 121 MOZ_ASSERT((pair.first == nullptr) == (pair.second == 0)); 122 if (pair.first) { 123 base = root->rhs(); 124 index = pair.first; 125 shift = pair.second; 126 } 127 } 128 129 if (!base) { 130 return; 131 } 132 MOZ_ASSERT(shift >= 1 && shift <= 3); 133 134 // IsShiftBy123 ensures that the MLsh node is not `recoveredOnBailout`, and 135 // this test takes care of the MAdd node. 136 if (root->isRecoveredOnBailout()) { 137 return; 138 } 139 140 // Pattern matching succeeded. 141 Scale scale = ShiftToScale(shift); 142 MOZ_ASSERT(scale != TimesOne); 143 144 MInstruction* replacement = nullptr; 145 if (base->maybeConstantValue()) { 146 int32_t baseValue = base->maybeConstantValue()->toInt32(); 147 if (baseValue == 0) { 148 // We'd only be rolling one operation -- the shift -- into the result, so 149 // don't bother. 150 return; 151 } 152 if (!OffsetIsSmallEnough(baseValue)) { 153 // `baseValue` would take more than one insn to get into a register, 154 // which makes the change less likely to be a win. See bug 1979829. 155 return; 156 } 157 replacement = MEffectiveAddress2::New(alloc, index, scale, baseValue); 158 } else { 159 replacement = MEffectiveAddress3::New(alloc, base, index, scale, 0); 160 } 161 162 root->replaceAllUsesWith(replacement); 163 root->block()->insertAfter(root, replacement); 164 165 if (JitSpewEnabled(JitSpew_EAA)) { 166 JitSpewCont(JitSpew_EAA, " create: '"); 167 DumpMIRDefinition(JitSpewPrinter(), replacement, /*showDetails=*/false); 168 JitSpewCont(JitSpew_EAA, "'\n"); 169 } 170 } 171 172 // ===================================================================== 173 // 174 // Top level driver. 175 176 bool EffectiveAddressAnalysis::analyze() { 177 JitSpew(JitSpew_EAA, "Begin"); 178 179 for (ReversePostorderIterator block(graph_.rpoBegin()); 180 block != graph_.rpoEnd(); block++) { 181 // Traverse backwards through `block`, trying to rewrite each MIR node if 182 // we can. Rewriting may cause nodes to become dead. We do not try to 183 // remove those here, but leave them for a later DCE pass to clear up. 184 185 MInstructionReverseIterator ri(block->rbegin()); 186 while (ri != block->rend()) { 187 // Nodes are added immediately after `curr`, so the iterator won't 188 // traverse them, since we're iterating backwards. 189 MInstruction* curr = *ri; 190 ri++; 191 192 if (MOZ_LIKELY(!curr->isAdd())) { 193 continue; 194 } 195 if (curr->type() != MIRType::Int32 || !curr->hasUses()) { 196 continue; 197 } 198 199 // This check needs to precede any allocation done in this loop. 200 if (MOZ_UNLIKELY(!graph_.alloc().ensureBallast())) { 201 return false; 202 } 203 204 TryMatchShiftAdd(graph_.alloc(), curr->toAdd()); 205 } 206 } 207 208 JitSpew(JitSpew_EAA, "End"); 209 return true; 210 }