llvmorg-21-init-12122-g59c6d70ed812.patch (6149B)
1 From 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0 Mon Sep 17 00:00:00 2001 2 From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> 3 Date: Thu, 15 May 2025 09:27:25 -0700 4 Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is 5 before MemoryInst (#139303) 6 7 Function optimizeBlock may do optimizations on a block for multiple 8 times. In the first iteration of the loop, MemoryInst1 may generate a 9 sunk instruction and store it into SunkAddrs. In the second iteration of 10 the loop, MemoryInst2 may use the same address and then it can reuse the 11 sunk instruction stored in SunkAddrs, but MemoryInst2 may be before 12 MemoryInst1 and the corresponding sunk instruction. In order to avoid 13 use before def error, we need to find appropriate insert position for the 14 sunk instruction. 15 16 Fixes #138208. 17 --- 18 llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++--- 19 .../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++ 20 2 files changed, 80 insertions(+), 5 deletions(-) 21 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll 22 23 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp 24 index 2c53a9c27ccb..76f27623c865 100644 25 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp 26 +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp 27 @@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 28 return false; 29 } 30 31 +// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst 32 +// is the first instruction that will use Addr. So we need to find the first 33 +// user of Addr in current BB. 34 +static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, 35 + Value *SunkAddr) { 36 + if (Addr->hasOneUse()) 37 + return MemoryInst->getIterator(); 38 + 39 + // We already have a SunkAddr in current BB, but we may need to insert cast 40 + // instruction after it. 41 + if (SunkAddr) { 42 + if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr)) 43 + return std::next(AddrInst->getIterator()); 44 + } 45 + 46 + // Find the first user of Addr in current BB. 47 + Instruction *Earliest = MemoryInst; 48 + for (User *U : Addr->users()) { 49 + Instruction *UserInst = dyn_cast<Instruction>(U); 50 + if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { 51 + if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst()) 52 + continue; 53 + if (UserInst->comesBefore(Earliest)) 54 + Earliest = UserInst; 55 + } 56 + } 57 + return Earliest->getIterator(); 58 +} 59 + 60 /// Sink addressing mode computation immediate before MemoryInst if doing so 61 /// can be done without increasing register pressure. The need for the 62 /// register pressure constraint means this can end up being an all or nothing 63 @@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 64 return Modified; 65 } 66 67 - // Insert this computation right after this user. Since our caller is 68 - // scanning from the top of the BB to the bottom, reuse of the expr are 69 - // guaranteed to happen later. 70 - IRBuilder<> Builder(MemoryInst); 71 - 72 // Now that we determined the addressing expression we want to use and know 73 // that we have to sink it into this block. Check to see if we have already 74 // done this for some other load/store instr in this block. If so, reuse 75 @@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 76 77 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; 78 Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); 79 + 80 + // The current BB may be optimized multiple times, we can't guarantee the 81 + // reuse of Addr happens later, call findInsertPos to find an appropriate 82 + // insert position. 83 + IRBuilder<> Builder(MemoryInst->getParent(), 84 + findInsertPos(Addr, MemoryInst, SunkAddr)); 85 + 86 if (SunkAddr) { 87 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode 88 << " for " << *MemoryInst << "\n"); 89 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll 90 new file mode 100644 91 index 000000000000..019f31140655 92 --- /dev/null 93 +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll 94 @@ -0,0 +1,44 @@ 95 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 96 +; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s 97 + 98 +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" 99 +target triple = "x86_64-grtev4-linux-gnu" 100 + 101 +declare void @g(ptr) 102 + 103 +; %load and %load5 use the same address, %load5 is optimized first, %load is 104 +; optimized later and reuse the same address computation instruction. We must 105 +; make sure not to generate use before def error. 106 + 107 +define void @f(ptr %arg) { 108 +; CHECK-LABEL: define void @f( 109 +; CHECK-SAME: ptr [[ARG:%.*]]) { 110 +; CHECK-NEXT: [[BB:.*:]] 111 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 112 +; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) 113 +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 114 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 115 +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 116 +; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 117 +; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 118 +; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) 119 +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 120 +; CHECK-NEXT: ret void 121 +; 122 +bb: 123 + %getelementptr = getelementptr i8, ptr %arg, i64 -64 124 + %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 125 + call void @g(ptr %getelementptr) 126 + br label %bb3 127 + 128 +bb3: 129 + %load = load ptr, ptr %getelementptr, align 8 130 + %load4 = load i32, ptr %getelementptr1, align 8 131 + %load5 = load ptr, ptr %getelementptr, align 8 132 + %add = add i32 1, 0 133 + %icmp = icmp eq i32 %add, 0 134 + br i1 %icmp, label %bb7, label %bb7 135 + 136 +bb7: 137 + ret void 138 +} 139 -- 140 2.51.0