[ tor-browser ].git.dasho

commit 7b810d534e1d349aa05bef50d8578019082627de
parent 38325fb14fa4e0c4fb6c769cfe9b5044baada715
Author: serge-sans-paille <sguelton@mozilla.com>
Date:   Wed, 24 Dec 2025 07:22:21 +0000

Bug 2007081 - Backport upstream patch that fixes wrong instruction scheduling r=glandium,yjuglaret

This fixes a codegen bug triggered by JetStream3 benchmark, see
https://bugzilla.mozilla.org/show_bug.cgi?id=1962418#c17

Differential Revision: https://phabricator.services.mozilla.com/D277309

Diffstat:
M build/build-clang/clang-20.json  | 1 +
A build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch  | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 142 insertions(+), 0 deletions(-)
diff --git a/build/build-clang/clang-20.json b/build/build-clang/clang-20.json
@@ -9,6 +9,7 @@
         "revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_20.patch",
         "revert-llvmorg-15-init-13446-g7524fe962e47.patch",
         "llvmorg-21-init-2651-gbac417107339.patch",
+        "llvmorg-21-init-12122-g59c6d70ed812.patch",
         "llvmorg-21-init-17702-g0d7e64f5d2b4.patch",
         "llvmorg-22-init-1937-g2672719a09cf.patch",
         "llvmorg-22-init-2380-gd9971be83e5d.patch",
diff --git a/build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch b/build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch
@@ -0,0 +1,141 @@
+From 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0 Mon Sep 17 00:00:00 2001
+From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
+Date: Thu, 15 May 2025 09:27:25 -0700
+Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
+ before MemoryInst (#139303)
+
+Function optimizeBlock may do optimizations on a block for multiple
+times. In the first iteration of the loop, MemoryInst1 may generate a
+sunk instruction and store it into SunkAddrs. In the second iteration of
+the loop, MemoryInst2 may use the same address and then it can reuse the
+sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
+MemoryInst1 and the corresponding sunk instruction. In order to avoid
+use before def error, we need to find appropriate insert position for the
+ sunk instruction.
+
+Fixes #138208.
+---
+ llvm/lib/CodeGen/CodeGenPrepare.cpp           | 41 ++++++++++++++---
+ .../CodeGenPrepare/X86/sink-addr-reuse.ll     | 44 +++++++++++++++++++
+ 2 files changed, 80 insertions(+), 5 deletions(-)
+ create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+
+diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+index 2c53a9c27ccb..76f27623c865 100644
+--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
++++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+@@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+   return false;
+ }
+ 
++// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
++// is the first instruction that will use Addr. So we need to find the first
++// user of Addr in current BB.
++static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
++                                          Value *SunkAddr) {
++  if (Addr->hasOneUse())
++    return MemoryInst->getIterator();
++
++  // We already have a SunkAddr in current BB, but we may need to insert cast
++  // instruction after it.
++  if (SunkAddr) {
++    if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
++      return std::next(AddrInst->getIterator());
++  }
++
++  // Find the first user of Addr in current BB.
++  Instruction *Earliest = MemoryInst;
++  for (User *U : Addr->users()) {
++    Instruction *UserInst = dyn_cast<Instruction>(U);
++    if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
++      if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
++        continue;
++      if (UserInst->comesBefore(Earliest))
++        Earliest = UserInst;
++    }
++  }
++  return Earliest->getIterator();
++}
++
+ /// Sink addressing mode computation immediate before MemoryInst if doing so
+ /// can be done without increasing register pressure.  The need for the
+ /// register pressure constraint means this can end up being an all or nothing
+@@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+     return Modified;
+   }
+ 
+-  // Insert this computation right after this user.  Since our caller is
+-  // scanning from the top of the BB to the bottom, reuse of the expr are
+-  // guaranteed to happen later.
+-  IRBuilder<> Builder(MemoryInst);
+-
+   // Now that we determined the addressing expression we want to use and know
+   // that we have to sink it into this block.  Check to see if we have already
+   // done this for some other load/store instr in this block.  If so, reuse
+@@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ 
+   Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+   Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
++
++  // The current BB may be optimized multiple times, we can't guarantee the
++  // reuse of Addr happens later, call findInsertPos to find an appropriate
++  // insert position.
++  IRBuilder<> Builder(MemoryInst->getParent(),
++                      findInsertPos(Addr, MemoryInst, SunkAddr));
++
+   if (SunkAddr) {
+     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
+                       << " for " << *MemoryInst << "\n");
+diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+new file mode 100644
+index 000000000000..019f31140655
+--- /dev/null
++++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+@@ -0,0 +1,44 @@
++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
++; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
++
++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-grtev4-linux-gnu"
++
++declare void @g(ptr)
++
++; %load and %load5 use the same address, %load5 is optimized first, %load is
++; optimized later and reuse the same address computation instruction. We must
++; make sure not to generate use before def error.
++
++define void @f(ptr %arg) {
++; CHECK-LABEL: define void @f(
++; CHECK-SAME: ptr [[ARG:%.*]]) {
++; CHECK-NEXT:  [[BB:.*:]]
++; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
++; CHECK-NEXT:    call void @g(ptr [[GETELEMENTPTR]])
++; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
++; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
++; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
++; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
++; CHECK-NEXT:    [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
++; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
++; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
++; CHECK-NEXT:    ret void
++;
++bb:
++  %getelementptr = getelementptr i8, ptr %arg, i64 -64
++  %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
++  call void @g(ptr %getelementptr)
++  br label %bb3
++
++bb3:
++  %load = load ptr, ptr %getelementptr, align 8
++  %load4 = load i32, ptr %getelementptr1, align 8
++  %load5 = load ptr, ptr %getelementptr, align 8
++  %add = add i32 1, 0
++  %icmp = icmp eq i32 %add, 0
++  br i1 %icmp, label %bb7, label %bb7
++
++bb7:
++  ret void
++}
+-- 
+2.51.0
+

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	build/build-clang/clang-20.json	\|	1	+
A	build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch	\|	141	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++