tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 7b810d534e1d349aa05bef50d8578019082627de
parent 38325fb14fa4e0c4fb6c769cfe9b5044baada715
Author: serge-sans-paille <sguelton@mozilla.com>
Date:   Wed, 24 Dec 2025 07:22:21 +0000

Bug 2007081 - Backport upstream patch that fixes wrong instruction scheduling r=glandium,yjuglaret

This fixes a codegen bug triggered by JetStream3 benchmark, see
https://bugzilla.mozilla.org/show_bug.cgi?id=1962418#c17

Differential Revision: https://phabricator.services.mozilla.com/D277309

Diffstat:
Mbuild/build-clang/clang-20.json | 1+
Abuild/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 142 insertions(+), 0 deletions(-)

diff --git a/build/build-clang/clang-20.json b/build/build-clang/clang-20.json @@ -9,6 +9,7 @@ "revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_20.patch", "revert-llvmorg-15-init-13446-g7524fe962e47.patch", "llvmorg-21-init-2651-gbac417107339.patch", + "llvmorg-21-init-12122-g59c6d70ed812.patch", "llvmorg-21-init-17702-g0d7e64f5d2b4.patch", "llvmorg-22-init-1937-g2672719a09cf.patch", "llvmorg-22-init-2380-gd9971be83e5d.patch", diff --git a/build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch b/build/build-clang/llvmorg-21-init-12122-g59c6d70ed812.patch @@ -0,0 +1,141 @@ +From 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0 Mon Sep 17 00:00:00 2001 +From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> +Date: Thu, 15 May 2025 09:27:25 -0700 +Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is + before MemoryInst (#139303) + +Function optimizeBlock may do optimizations on a block for multiple +times. In the first iteration of the loop, MemoryInst1 may generate a +sunk instruction and store it into SunkAddrs. In the second iteration of +the loop, MemoryInst2 may use the same address and then it can reuse the +sunk instruction stored in SunkAddrs, but MemoryInst2 may be before +MemoryInst1 and the corresponding sunk instruction. In order to avoid +use before def error, we need to find appropriate insert position for the + sunk instruction. + +Fixes #138208. +--- + llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++--- + .../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++ + 2 files changed, 80 insertions(+), 5 deletions(-) + create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll + +diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp +index 2c53a9c27ccb..76f27623c865 100644 +--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp ++++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp +@@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + return false; + } + ++// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst ++// is the first instruction that will use Addr. So we need to find the first ++// user of Addr in current BB. ++static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, ++ Value *SunkAddr) { ++ if (Addr->hasOneUse()) ++ return MemoryInst->getIterator(); ++ ++ // We already have a SunkAddr in current BB, but we may need to insert cast ++ // instruction after it. ++ if (SunkAddr) { ++ if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr)) ++ return std::next(AddrInst->getIterator()); ++ } ++ ++ // Find the first user of Addr in current BB. ++ Instruction *Earliest = MemoryInst; ++ for (User *U : Addr->users()) { ++ Instruction *UserInst = dyn_cast<Instruction>(U); ++ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { ++ if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst()) ++ continue; ++ if (UserInst->comesBefore(Earliest)) ++ Earliest = UserInst; ++ } ++ } ++ return Earliest->getIterator(); ++} ++ + /// Sink addressing mode computation immediate before MemoryInst if doing so + /// can be done without increasing register pressure. The need for the + /// register pressure constraint means this can end up being an all or nothing +@@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + return Modified; + } + +- // Insert this computation right after this user. Since our caller is +- // scanning from the top of the BB to the bottom, reuse of the expr are +- // guaranteed to happen later. +- IRBuilder<> Builder(MemoryInst); +- + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse +@@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + + Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); ++ ++ // The current BB may be optimized multiple times, we can't guarantee the ++ // reuse of Addr happens later, call findInsertPos to find an appropriate ++ // insert position. ++ IRBuilder<> Builder(MemoryInst->getParent(), ++ findInsertPos(Addr, MemoryInst, SunkAddr)); ++ + if (SunkAddr) { + LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode + << " for " << *MemoryInst << "\n"); +diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll +new file mode 100644 +index 000000000000..019f31140655 +--- /dev/null ++++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll +@@ -0,0 +1,44 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ++; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s ++ ++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-grtev4-linux-gnu" ++ ++declare void @g(ptr) ++ ++; %load and %load5 use the same address, %load5 is optimized first, %load is ++; optimized later and reuse the same address computation instruction. We must ++; make sure not to generate use before def error. ++ ++define void @f(ptr %arg) { ++; CHECK-LABEL: define void @f( ++; CHECK-SAME: ptr [[ARG:%.*]]) { ++; CHECK-NEXT: [[BB:.*:]] ++; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 ++; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) ++; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 ++; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 ++; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 ++; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 ++; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 ++; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) ++; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 ++; CHECK-NEXT: ret void ++; ++bb: ++ %getelementptr = getelementptr i8, ptr %arg, i64 -64 ++ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 ++ call void @g(ptr %getelementptr) ++ br label %bb3 ++ ++bb3: ++ %load = load ptr, ptr %getelementptr, align 8 ++ %load4 = load i32, ptr %getelementptr1, align 8 ++ %load5 = load ptr, ptr %getelementptr, align 8 ++ %add = add i32 1, 0 ++ %icmp = icmp eq i32 %add, 0 ++ br i1 %icmp, label %bb7, label %bb7 ++ ++bb7: ++ ret void ++} +-- +2.51.0 +