tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

llvmorg-21-init-12122-g59c6d70ed812.patch (6149B)


      1 From 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0 Mon Sep 17 00:00:00 2001
      2 From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
      3 Date: Thu, 15 May 2025 09:27:25 -0700
      4 Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
      5 before MemoryInst (#139303)
      6 
      7 Function optimizeBlock may do optimizations on a block for multiple
      8 times. In the first iteration of the loop, MemoryInst1 may generate a
      9 sunk instruction and store it into SunkAddrs. In the second iteration of
     10 the loop, MemoryInst2 may use the same address and then it can reuse the
     11 sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
     12 MemoryInst1 and the corresponding sunk instruction. In order to avoid
     13 use before def error, we need to find appropriate insert position for the
     14 sunk instruction.
     15 
     16 Fixes #138208.
     17 ---
     18 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 41 ++++++++++++++---
     19 .../CodeGenPrepare/X86/sink-addr-reuse.ll     | 44 +++++++++++++++++++
     20 2 files changed, 80 insertions(+), 5 deletions(-)
     21 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
     22 
     23 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
     24 index 2c53a9c27ccb..76f27623c865 100644
     25 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
     26 +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
     27 @@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
     28   return false;
     29 }
     30 
     31 +// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
     32 +// is the first instruction that will use Addr. So we need to find the first
     33 +// user of Addr in current BB.
     34 +static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
     35 +                                          Value *SunkAddr) {
     36 +  if (Addr->hasOneUse())
     37 +    return MemoryInst->getIterator();
     38 +
     39 +  // We already have a SunkAddr in current BB, but we may need to insert cast
     40 +  // instruction after it.
     41 +  if (SunkAddr) {
     42 +    if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
     43 +      return std::next(AddrInst->getIterator());
     44 +  }
     45 +
     46 +  // Find the first user of Addr in current BB.
     47 +  Instruction *Earliest = MemoryInst;
     48 +  for (User *U : Addr->users()) {
     49 +    Instruction *UserInst = dyn_cast<Instruction>(U);
     50 +    if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
     51 +      if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
     52 +        continue;
     53 +      if (UserInst->comesBefore(Earliest))
     54 +        Earliest = UserInst;
     55 +    }
     56 +  }
     57 +  return Earliest->getIterator();
     58 +}
     59 +
     60 /// Sink addressing mode computation immediate before MemoryInst if doing so
     61 /// can be done without increasing register pressure.  The need for the
     62 /// register pressure constraint means this can end up being an all or nothing
     63 @@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     64     return Modified;
     65   }
     66 
     67 -  // Insert this computation right after this user.  Since our caller is
     68 -  // scanning from the top of the BB to the bottom, reuse of the expr are
     69 -  // guaranteed to happen later.
     70 -  IRBuilder<> Builder(MemoryInst);
     71 -
     72   // Now that we determined the addressing expression we want to use and know
     73   // that we have to sink it into this block.  Check to see if we have already
     74   // done this for some other load/store instr in this block.  If so, reuse
     75 @@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     76 
     77   Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
     78   Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
     79 +
     80 +  // The current BB may be optimized multiple times, we can't guarantee the
     81 +  // reuse of Addr happens later, call findInsertPos to find an appropriate
     82 +  // insert position.
     83 +  IRBuilder<> Builder(MemoryInst->getParent(),
     84 +                      findInsertPos(Addr, MemoryInst, SunkAddr));
     85 +
     86   if (SunkAddr) {
     87     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
     88                       << " for " << *MemoryInst << "\n");
     89 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
     90 new file mode 100644
     91 index 000000000000..019f31140655
     92 --- /dev/null
     93 +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
     94 @@ -0,0 +1,44 @@
     95 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
     96 +; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
     97 +
     98 +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
     99 +target triple = "x86_64-grtev4-linux-gnu"
    100 +
    101 +declare void @g(ptr)
    102 +
    103 +; %load and %load5 use the same address, %load5 is optimized first, %load is
    104 +; optimized later and reuse the same address computation instruction. We must
    105 +; make sure not to generate use before def error.
    106 +
    107 +define void @f(ptr %arg) {
    108 +; CHECK-LABEL: define void @f(
    109 +; CHECK-SAME: ptr [[ARG:%.*]]) {
    110 +; CHECK-NEXT:  [[BB:.*:]]
    111 +; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
    112 +; CHECK-NEXT:    call void @g(ptr [[GETELEMENTPTR]])
    113 +; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
    114 +; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
    115 +; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
    116 +; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
    117 +; CHECK-NEXT:    [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
    118 +; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
    119 +; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
    120 +; CHECK-NEXT:    ret void
    121 +;
    122 +bb:
    123 +  %getelementptr = getelementptr i8, ptr %arg, i64 -64
    124 +  %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
    125 +  call void @g(ptr %getelementptr)
    126 +  br label %bb3
    127 +
    128 +bb3:
    129 +  %load = load ptr, ptr %getelementptr, align 8
    130 +  %load4 = load i32, ptr %getelementptr1, align 8
    131 +  %load5 = load ptr, ptr %getelementptr, align 8
    132 +  %add = add i32 1, 0
    133 +  %icmp = icmp eq i32 %add, 0
    134 +  br i1 %icmp, label %bb7, label %bb7
    135 +
    136 +bb7:
    137 +  ret void
    138 +}
    139 -- 
    140 2.51.0