DominatorTree.cpp (10878B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/DominatorTree.h" 8 9 #include <utility> 10 11 #include "jit/MIRGenerator.h" 12 #include "jit/MIRGraph.h" 13 14 using namespace js; 15 using namespace js::jit; 16 17 namespace js::jit { 18 19 // Implementation of the Semi-NCA algorithm, used to compute the immediate 20 // dominator block for each block in the MIR graph. The same algorithm is used 21 // by LLVM. 22 // 23 // Semi-NCA is described in this dissertation: 24 // 25 // Linear-Time Algorithms for Dominators and Related Problems 26 // Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: 27 // https://www.cs.princeton.edu/techreports/2005/737.pdf or 28 // ftp://ftp.cs.princeton.edu/reports/2005/737.pdf 29 // 30 // The code is also inspired by the implementation from the Julia compiler: 31 // 32 // https://github.com/JuliaLang/julia/blob/a73ba3bab7ddbf087bb64ef8d236923d8d7f0051/base/compiler/ssair/domtree.jl 33 // 34 // And the LLVM version: 35 // 36 // https://github.com/llvm/llvm-project/blob/bcd65ba6129bea92485432fdd09874bc3fc6671e/llvm/include/llvm/Support/GenericDomTreeConstruction.h 37 // 38 // At a high level, the algorithm has the following phases: 39 // 40 // 1) Depth-First Search (DFS) to assign a new 'DFS-pre-number' id to each 41 // basic block. See initStateAndRenumberBlocks. 42 // 2) Compute semi-dominators. The first loop in computeDominators. 43 // 3) Compute immediate dominators. The second loop in computeDominators. 44 // 4) Store the immediate dominators in the MIR graph. The third loop in 45 // computeDominators. 46 // 47 // Future work: the algorithm can be extended to update the dominator tree more 48 // efficiently after inserting or removing edges, instead of recomputing the 49 // whole graph. This is called Dynamic SNCA and both Julia and LLVM implement 50 // this. 51 class MOZ_RAII SemiNCA { 52 const MIRGenerator* mir_; 53 MIRGraph& graph_; 54 55 // State associated with each basic block. Note that the paper uses separate 56 // arrays for these fields: label[v] is stored as state_[v].label in our code. 57 struct BlockState { 58 MBasicBlock* block = nullptr; 59 uint32_t ancestor = 0; 60 uint32_t label = 0; 61 uint32_t semi = 0; 62 uint32_t idom = 0; 63 }; 64 using BlockStateVector = Vector<BlockState, 8, BackgroundSystemAllocPolicy>; 65 BlockStateVector state_; 66 67 // Stack for |compress|. This is allocated here instead of in |compress| to 68 // avoid extra malloc/free calls. 69 using CompressStack = Vector<uint32_t, 16, BackgroundSystemAllocPolicy>; 70 CompressStack compressStack_; 71 72 [[nodiscard]] bool initStateAndRenumberBlocks(); 73 [[nodiscard]] bool compress(uint32_t v, uint32_t lastLinked); 74 75 public: 76 SemiNCA(const MIRGenerator* mir, MIRGraph& graph) 77 : mir_(mir), graph_(graph) {} 78 79 [[nodiscard]] bool computeDominators(); 80 }; 81 82 } // namespace js::jit 83 84 bool SemiNCA::initStateAndRenumberBlocks() { 85 MOZ_ASSERT(state_.empty()); 86 87 // MIR graphs can have multiple root blocks when OSR is used, but the 88 // algorithm assumes the graph has a single root block. Use a virtual root 89 // block (with id 0) that has the actual root blocks as successors. At the 90 // end, if a block has this virtual root node as immediate dominator, we mark 91 // it self-dominating. 92 size_t nblocks = 1 /* virtual root */ + graph_.numBlocks(); 93 if (!state_.growBy(nblocks)) { 94 return false; 95 } 96 97 using BlockAndParent = std::pair<MBasicBlock*, uint32_t>; 98 Vector<BlockAndParent, 16, BackgroundSystemAllocPolicy> worklist; 99 100 // Append all root blocks to the work list. 101 constexpr size_t RootId = 0; 102 if (!worklist.emplaceBack(graph_.entryBlock(), RootId)) { 103 return false; 104 } 105 106 if (MBasicBlock* osrBlock = graph_.osrBlock()) { 107 if (!worklist.emplaceBack(osrBlock, RootId)) { 108 return false; 109 } 110 111 // If OSR is used, the graph can contain blocks marked unreachable. These 112 // blocks aren't reachable from the other roots, so treat them as additional 113 // roots to make sure we visit (and initialize state for) all blocks in the 114 // graph. 115 for (MBasicBlock* block : graph_) { 116 if (block->unreachable()) { 117 if (!worklist.emplaceBack(block, RootId)) { 118 return false; 119 } 120 } 121 } 122 } 123 124 // Visit all blocks in DFS order and re-number them. Also initialize the 125 // BlockState for each block. 126 uint32_t id = 1; 127 while (!worklist.empty()) { 128 auto [block, parent] = worklist.popCopy(); 129 block->mark(); 130 block->setId(id); 131 state_[id] = { 132 .block = block, .ancestor = parent, .label = id, .idom = parent}; 133 134 for (size_t i = 0, n = block->numSuccessors(); i < n; i++) { 135 MBasicBlock* succ = block->getSuccessor(i); 136 if (succ->isMarked()) { 137 continue; 138 } 139 if (!worklist.emplaceBack(succ, id)) { 140 return false; 141 } 142 } 143 144 id++; 145 } 146 147 MOZ_ASSERT(id == nblocks, "should have visited all blocks"); 148 return true; 149 } 150 151 bool SemiNCA::compress(uint32_t v, uint32_t lastLinked) { 152 // Iterative implementation of snca_compress in Figure 2.8 of the paper, but 153 // with the optimization described in section 2.2.1 to compare against 154 // lastLinked instead of checking the ancestor is 0. 155 // 156 // Walk up the ancestor chain from |v| until we reach the last node we already 157 // processed. Then walk back to |v| and compress this path. 158 159 if (state_[v].ancestor < lastLinked) { 160 return true; 161 } 162 163 // Push v and all its ancestor nodes that we've processed on the stack, except 164 // for the last one. 165 MOZ_ASSERT(compressStack_.empty()); 166 do { 167 if (!compressStack_.append(v)) { 168 return false; 169 } 170 v = state_[v].ancestor; 171 } while (state_[v].ancestor >= lastLinked); 172 173 // Now walk the ancestor chain back to the node where we started and perform 174 // path compression. 175 uint32_t root = state_[v].ancestor; 176 do { 177 uint32_t u = v; 178 v = compressStack_.popCopy(); 179 MOZ_ASSERT(u < v); 180 MOZ_ASSERT(state_[v].ancestor == u); 181 MOZ_ASSERT(u >= lastLinked); 182 183 // The meat of the snca_compress function in the paper. 184 if (state_[u].label < state_[v].label) { 185 state_[v].label = state_[u].label; 186 } 187 state_[v].ancestor = root; 188 } while (!compressStack_.empty()); 189 190 return true; 191 } 192 193 bool SemiNCA::computeDominators() { 194 if (!initStateAndRenumberBlocks()) { 195 return false; 196 } 197 198 size_t nblocks = state_.length(); 199 200 // The following two loops implement the SNCA algorithm in Figure 2.8 of the 201 // paper. 202 203 // Compute semi-dominators. 204 for (size_t w = nblocks - 1; w > 0; w--) { 205 if (mir_->shouldCancel("SemiNCA computeDominators")) { 206 return false; 207 } 208 209 MBasicBlock* block = state_[w].block; 210 uint32_t semiW = state_[w].ancestor; 211 212 // All nodes with id >= lastLinked have already been processed and are 213 // eligible for path compression. See also the comment in |compress|. 214 uint32_t lastLinked = w + 1; 215 216 for (size_t i = 0, n = block->numPredecessors(); i < n; i++) { 217 MBasicBlock* pred = block->getPredecessor(i); 218 uint32_t v = pred->id(); 219 if (v >= lastLinked) { 220 // Attempt path compression. Note that this |v >= lastLinked| check 221 // isn't strictly required and isn't in the paper, because it's implied 222 // by the first check in |compress|, but this is more efficient. 223 if (!compress(v, lastLinked)) { 224 return false; 225 } 226 } 227 semiW = std::min(semiW, state_[v].label); 228 } 229 230 state_[w].semi = semiW; 231 state_[w].label = semiW; 232 } 233 234 // Compute immediate dominators. 235 for (size_t v = 1; v < nblocks; v++) { 236 auto& blockState = state_[v]; 237 uint32_t idom = blockState.idom; 238 while (idom > blockState.semi) { 239 idom = state_[idom].idom; 240 } 241 blockState.idom = idom; 242 } 243 244 // Set immediate dominators for all blocks in the MIR graph. Also unmark all 245 // blocks (|initStateAndRenumberBlocks| marked them) and restore their 246 // original IDs. 247 uint32_t id = 0; 248 for (ReversePostorderIterator block(graph_.rpoBegin()); 249 block != graph_.rpoEnd(); block++) { 250 auto& state = state_[block->id()]; 251 if (state.idom == 0) { 252 // If a block is dominated by the virtual root node, it's self-dominating. 253 block->setImmediateDominator(*block); 254 } else { 255 block->setImmediateDominator(state_[state.idom].block); 256 } 257 block->unmark(); 258 block->setId(id++); 259 } 260 261 // Done! 262 return true; 263 } 264 265 static bool ComputeImmediateDominators(const MIRGenerator* mir, 266 MIRGraph& graph) { 267 SemiNCA semiNCA(mir, graph); 268 return semiNCA.computeDominators(); 269 } 270 271 bool jit::BuildDominatorTree(const MIRGenerator* mir, MIRGraph& graph) { 272 MOZ_ASSERT(graph.canBuildDominators()); 273 274 if (!ComputeImmediateDominators(mir, graph)) { 275 return false; 276 } 277 278 Vector<MBasicBlock*, 4, JitAllocPolicy> worklist(graph.alloc()); 279 280 // Traversing through the graph in post-order means that every non-phi use 281 // of a definition is visited before the def itself. Since a def 282 // dominates its uses, by the time we reach a particular 283 // block, we have processed all of its dominated children, so 284 // block->numDominated() is accurate. 285 for (PostorderIterator i(graph.poBegin()); i != graph.poEnd(); i++) { 286 MBasicBlock* child = *i; 287 MBasicBlock* parent = child->immediateDominator(); 288 289 // Dominance is defined such that blocks always dominate themselves. 290 child->addNumDominated(1); 291 292 // If the block only self-dominates, it has no definite parent. 293 // Add it to the worklist as a root for pre-order traversal. 294 // This includes all roots. Order does not matter. 295 if (child == parent) { 296 if (!worklist.append(child)) { 297 return false; 298 } 299 continue; 300 } 301 302 if (!parent->addImmediatelyDominatedBlock(child)) { 303 return false; 304 } 305 306 parent->addNumDominated(child->numDominated()); 307 } 308 309 #ifdef DEBUG 310 // If compiling with OSR, many blocks will self-dominate. 311 // Without OSR, there is only one root block which dominates all. 312 if (!graph.osrBlock()) { 313 MOZ_ASSERT(graph.entryBlock()->numDominated() == graph.numBlocks()); 314 } 315 #endif 316 // Now, iterate through the dominator tree in pre-order and annotate every 317 // block with its index in the traversal. 318 size_t index = 0; 319 while (!worklist.empty()) { 320 MBasicBlock* block = worklist.popCopy(); 321 block->setDomIndex(index); 322 323 if (!worklist.append(block->immediatelyDominatedBlocksBegin(), 324 block->immediatelyDominatedBlocksEnd())) { 325 return false; 326 } 327 index++; 328 } 329 330 return true; 331 } 332 333 void jit::ClearDominatorTree(MIRGraph& graph) { 334 for (MBasicBlockIterator iter = graph.begin(); iter != graph.end(); iter++) { 335 iter->clearDominatorInfo(); 336 } 337 }