tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 452c2702f1bca9bdfe06d05f273a7777647afc28
parent 5202e780e1906ff18568c035bd76a5552bc751a6
Author: Nicolas Silva <nical@fastmail.com>
Date:   Tue,  9 Dec 2025 08:19:03 +0000

Bug 1996818 - Remove the gpu cache. r=gw

Differential Revision: https://phabricator.services.mozilla.com/D270818

Diffstat:
Mgfx/layers/ipc/CompositorBridgeParent.cpp | 5-----
Mgfx/layers/ipc/PCompositorBridge.ipdl | 1-
Mgfx/layers/wr/WebRenderBridgeParent.cpp | 1-
Mgfx/layers/wr/WebRenderMessageUtils.h | 3---
Mgfx/thebes/gfxPlatform.cpp | 9++-------
Dgfx/wr/webrender/res/gpu_cache.glsl | 95-------------------------------------------------------------------------------
Mgfx/wr/webrender/src/frame_builder.rs | 19+------------------
Dgfx/wr/webrender/src/gpu_cache.rs | 935-------------------------------------------------------------------------------
Mgfx/wr/webrender/src/internal_types.rs | 2--
Mgfx/wr/webrender/src/lib.rs | 1-
Mgfx/wr/webrender/src/picture.rs | 27++++++++++++---------------
Mgfx/wr/webrender/src/prepare.rs | 8++------
Mgfx/wr/webrender/src/prim_store/gradient/conic.rs | 3+--
Mgfx/wr/webrender/src/prim_store/gradient/linear.rs | 6++----
Mgfx/wr/webrender/src/prim_store/gradient/radial.rs | 3+--
Mgfx/wr/webrender/src/prim_store/image.rs | 3+--
Mgfx/wr/webrender/src/profiler.rs | 379++++++++++++++++++++++++++-----------------------------------------------------
Mgfx/wr/webrender/src/render_api.rs | 5-----
Mgfx/wr/webrender/src/render_backend.rs | 72+-----------------------------------------------------------------------
Mgfx/wr/webrender/src/render_task.rs | 5+----
Mgfx/wr/webrender/src/render_task_cache.rs | 12++++--------
Mgfx/wr/webrender/src/render_task_graph.rs | 11++++++-----
Dgfx/wr/webrender/src/renderer/gpu_cache.rs | 534-------------------------------------------------------------------------------
Mgfx/wr/webrender/src/renderer/init.rs | 28++--------------------------
Mgfx/wr/webrender/src/renderer/mod.rs | 183++++++++++++-------------------------------------------------------------------
Mgfx/wr/webrender/src/renderer/shade.rs | 2--
Mgfx/wr/webrender/src/renderer/vertex.rs | 16----------------
Mgfx/wr/webrender/src/resource_cache.rs | 10+++-------
Mgfx/wr/webrender/src/texture_cache.rs | 18++++++++++--------
Mgfx/wr/webrender/src/visibility.rs | 2--
Mgfx/wr/webrender_build/src/shader.rs | 4++--
Mgfx/wr/wrench/src/main.rs | 1-
Mgfx/wr/wrench/src/png.rs | 9---------
33 files changed, 203 insertions(+), 2209 deletions(-)

diff --git a/gfx/layers/ipc/CompositorBridgeParent.cpp b/gfx/layers/ipc/CompositorBridgeParent.cpp @@ -1873,11 +1873,6 @@ int32_t RecordContentFrameTime( .AccumulateSingleSample( static_cast<unsigned long long>(fracLatencyNorm)); - if (aStats) { - latencyMs -= (double(aStats->gpu_cache_upload_time) / 1000000.0); - latencyNorm = latencyMs / aVsyncRate.ToMilliseconds(); - fracLatencyNorm = lround(latencyNorm * 100.0); - } mozilla::glean::gfx_content_frame_time::without_resource_upload .AccumulateSingleSample( static_cast<unsigned long long>(fracLatencyNorm)); diff --git a/gfx/layers/ipc/PCompositorBridge.ipdl b/gfx/layers/ipc/PCompositorBridge.ipdl @@ -54,7 +54,6 @@ struct FrameStats { TimeStamp compositeEnd; int32_t contentFrameTime; double resourceUploadTime; - double gpuCacheUploadTime; TimeStamp transactionStart; TimeStamp refreshStart; TimeStamp fwdTime; diff --git a/gfx/layers/wr/WebRenderBridgeParent.cpp b/gfx/layers/wr/WebRenderBridgeParent.cpp @@ -2673,7 +2673,6 @@ void WebRenderBridgeParent::FlushTransactionIdsForEpoch( transactionId.mId, aCompositeStartTime, aRenderStartTime, aEndTime, contentFrameTime, aStats ? (double(aStats->resource_upload_time) / 1000000.0) : 0.0, - aStats ? (double(aStats->gpu_cache_upload_time) / 1000000.0) : 0.0, transactionId.mTxnStartTime, transactionId.mRefreshStartTime, transactionId.mFwdTime, transactionId.mSceneBuiltTime, transactionId.mSkippedComposites, transactionId.mTxnURL)); diff --git a/gfx/layers/wr/WebRenderMessageUtils.h b/gfx/layers/wr/WebRenderMessageUtils.h @@ -296,8 +296,6 @@ inline auto TiedFields<mozilla::wr::MemoryReport>( // clang-format off return std::tie( a.clip_stores, - a.gpu_cache_metadata, - a.gpu_cache_cpu_mirror, a.hit_testers, a.fonts, a.weak_fonts, @@ -310,7 +308,6 @@ inline auto TiedFields<mozilla::wr::MemoryReport>( a.swgl, a.frame_allocator, a.render_tasks, - a.gpu_cache_textures, a.vertex_data_textures, a.render_target_textures, a.picture_tile_textures, diff --git a/gfx/thebes/gfxPlatform.cpp b/gfx/thebes/gfxPlatform.cpp @@ -539,7 +539,6 @@ static void WebRenderDebugPrefChangeCallback(const char* aPrefName, void*) { GFX_WEBRENDER_DEBUG(".echo-driver-messages", wr::DebugFlags::ECHO_DRIVER_MESSAGES) GFX_WEBRENDER_DEBUG(".show-overdraw", wr::DebugFlags::SHOW_OVERDRAW) - GFX_WEBRENDER_DEBUG(".gpu-cache", wr::DebugFlags::GPU_CACHE_DBG) GFX_WEBRENDER_DEBUG(".texture-cache.clear-evicted", wr::DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED) GFX_WEBRENDER_DEBUG(".picture-caching", wr::DebugFlags::PICTURE_CACHING_DBG) @@ -732,8 +731,6 @@ WebRenderMemoryReporter::CollectReports(nsIHandleReportCallback* aHandleReport, [=](wr::MemoryReport aReport) { // CPU Memory. helper.Report(aReport.clip_stores, "clip-stores"); - helper.Report(aReport.gpu_cache_metadata, "gpu-cache/metadata"); - helper.Report(aReport.gpu_cache_cpu_mirror, "gpu-cache/cpu-mirror"); helper.Report(aReport.hit_testers, "hit-testers"); helper.Report(aReport.fonts, "resource-cache/fonts"); helper.Report(aReport.weak_fonts, "resource-cache/weak-fonts"); @@ -753,7 +750,6 @@ WebRenderMemoryReporter::CollectReports(nsIHandleReportCallback* aHandleReport, WEBRENDER_FOR_EACH_INTERNER(REPORT_DATA_STORE, ); // GPU Memory. - helper.ReportTexture(aReport.gpu_cache_textures, "gpu-cache"); helper.ReportTexture(aReport.vertex_data_textures, "vertex-data"); helper.ReportTexture(aReport.render_target_textures, "render-targets"); helper.ReportTexture(aReport.depth_target_textures, "depth-targets"); @@ -3648,8 +3644,7 @@ void gfxPlatform::GetFrameStats(mozilla::widget::InfoObject& aObj) { "Frame %" PRIu64 "(%s) CONTENT_FRAME_TIME %d - Transaction start %f, main-thread time " "%f, full paint time %f, Skipped composites %u, Composite start %f, " - "Resource upload time %f, GPU cache upload time %f, Render time %f, " - "Composite time %f", + "Resource upload time %f, Render time %f, Composite time %f", f.id().mId, f.url().get(), f.contentFrameTime(), (f.transactionStart() - f.refreshStart()).ToMilliseconds(), (f.fwdTime() - f.transactionStart()).ToMilliseconds(), @@ -3658,7 +3653,7 @@ void gfxPlatform::GetFrameStats(mozilla::widget::InfoObject& aObj) { : 0.0, f.skippedComposites(), (f.compositeStart() - f.refreshStart()).ToMilliseconds(), - f.resourceUploadTime(), f.gpuCacheUploadTime(), + f.resourceUploadTime(), (f.compositeEnd() - f.renderStart()).ToMilliseconds(), (f.compositeEnd() - f.compositeStart()).ToMilliseconds()); aObj.DefineProperty(name.get(), value.get()); diff --git a/gfx/wr/webrender/res/gpu_cache.glsl b/gfx/wr/webrender/res/gpu_cache.glsl @@ -1,95 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache; - -// TODO(gw): This is here temporarily while we have -// both GPU store and cache. When the GPU -// store code is removed, we can change the -// PrimitiveInstance instance structure to -// use 2x unsigned shorts as vertex attributes -// instead of an int, and encode the UV directly -// in the vertices. -ivec2 get_gpu_cache_uv(HIGHP_FS_ADDRESS int address) { - return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH, - uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH); -} - -vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) { - return vec4[2]( - TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)) - ); -} - -vec4[2] fetch_from_gpu_cache_2(HIGHP_FS_ADDRESS int address) { - ivec2 uv = get_gpu_cache_uv(address); - return vec4[2]( - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)) - ); -} - -vec4 fetch_from_gpu_cache_1_direct(ivec2 address) { - return texelFetch(sGpuCache, address, 0); -} - -vec4 fetch_from_gpu_cache_1(HIGHP_FS_ADDRESS int address) { - ivec2 uv = get_gpu_cache_uv(address); - return texelFetch(sGpuCache, uv, 0); -} - -#ifdef WR_VERTEX_SHADER - -vec4[8] fetch_from_gpu_cache_8(int address) { - ivec2 uv = get_gpu_cache_uv(address); - return vec4[8]( - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0)) - ); -} - -vec4[3] fetch_from_gpu_cache_3(int address) { - ivec2 uv = get_gpu_cache_uv(address); - return vec4[3]( - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)) - ); -} - -vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) { - return vec4[3]( - TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)) - ); -} - -vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) { - return vec4[4]( - TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)), - TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0)) - ); -} - -vec4[4] fetch_from_gpu_cache_4(int address) { - ivec2 uv = get_gpu_cache_uv(address); - return vec4[4]( - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)), - TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)) - ); -} - -#endif //WR_VERTEX_SHADER diff --git a/gfx/wr/webrender/src/frame_builder.rs b/gfx/wr/webrender/src/frame_builder.rs @@ -13,10 +13,9 @@ use crate::spatial_node::SpatialNodeType; use crate::spatial_tree::{SpatialTree, SpatialNodeIndex}; use crate::composite::{CompositorKind, CompositeState, CompositeStatePreallocator}; use crate::debug_item::DebugItem; -use crate::gpu_cache::GpuCache; use crate::gpu_types::{PrimitiveHeaders, TransformPalette, ZBufferIdGenerator}; use crate::gpu_types::{QuadSegment, TransformData}; -use crate::internal_types::{FastHashMap, PlaneSplitter, FrameId, FrameStamp}; +use crate::internal_types::{FastHashMap, PlaneSplitter, FrameStamp}; use crate::picture::{DirtyRegion, SliceId, TileCacheInstance}; use crate::picture::{SurfaceInfo, SurfaceIndex, ResolvedSurfaceTexture}; use crate::picture::{SubpixelMode, RasterConfig, PictureCompositeMode}; @@ -166,7 +165,6 @@ pub struct FrameBuildingState<'a> { pub rg_builder: &'a mut RenderTaskGraphBuilder, pub clip_store: &'a mut ClipStore, pub resource_cache: &'a mut ResourceCache, - pub gpu_cache: &'a mut GpuCache, pub transforms: &'a mut TransformPalette, pub segment_builder: SegmentBuilder, pub surfaces: &'a mut Vec<SurfaceInfo>, @@ -283,7 +281,6 @@ impl FrameBuilder { present: bool, global_screen_world_rect: WorldRect, resource_cache: &mut ResourceCache, - gpu_cache: &mut GpuCache, rg_builder: &mut RenderTaskGraphBuilder, global_device_pixel_scale: DevicePixelScale, scene_properties: &SceneProperties, @@ -395,7 +392,6 @@ impl FrameBuilder { let mut visibility_state = FrameVisibilityState { clip_store: &mut scene.clip_store, resource_cache, - gpu_cache, frame_gpu_data, data_stores, clip_tree: &mut scene.clip_tree, @@ -456,7 +452,6 @@ impl FrameBuilder { let mut visibility_state = FrameVisibilityState { clip_store: &mut scene.clip_store, resource_cache, - gpu_cache, frame_gpu_data, data_stores, clip_tree: &mut scene.clip_tree, @@ -529,7 +524,6 @@ impl FrameBuilder { rg_builder, clip_store: &mut scene.clip_store, resource_cache, - gpu_cache, transforms: transform_palette, segment_builder: SegmentBuilder::new(), surfaces: &mut scene.surfaces, @@ -644,7 +638,6 @@ impl FrameBuilder { scene: &mut BuiltScene, present: bool, resource_cache: &mut ResourceCache, - gpu_cache: &mut GpuCache, rg_builder: &mut RenderTaskGraphBuilder, stamp: FrameStamp, device_origin: DeviceIntPoint, @@ -672,7 +665,6 @@ impl FrameBuilder { profile.set(profiler::PRIMITIVES, scene.prim_instances.len()); profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count); scratch.begin_frame(); - gpu_cache.begin_frame(stamp); resource_cache.begin_frame(stamp, profile); // TODO(gw): Follow up patches won't clear this, as they'll be assigned @@ -710,7 +702,6 @@ impl FrameBuilder { present, screen_world_rect, resource_cache, - gpu_cache, rg_builder, global_device_pixel_scale, scene_properties, @@ -780,7 +771,6 @@ impl FrameBuilder { pass, output_size, &mut ctx, - gpu_cache, &mut gpu_buffer_builder, &render_tasks, &scene.clip_store, @@ -831,8 +821,6 @@ impl FrameBuilder { profile.end_time(profiler::FRAME_BATCHING_TIME); - let gpu_cache_frame_id = gpu_cache.end_frame(profile).frame_id(); - resource_cache.end_frame(profile); self.prim_headers_prealloc.record_vec(&prim_headers.headers_int); @@ -855,7 +843,6 @@ impl FrameBuilder { transform_palette: transform_palette.finish(), render_tasks, deferred_resolves, - gpu_cache_frame_id, has_been_rendered: false, has_texture_cache_tasks, prim_headers, @@ -1056,7 +1043,6 @@ pub fn build_render_pass( src_pass: &Pass, screen_size: DeviceIntSize, ctx: &mut RenderTargetContext, - _gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilder, render_tasks: &RenderTaskGraph, clip_store: &ClipStore, @@ -1323,9 +1309,6 @@ pub struct Frame { pub render_tasks: RenderTaskGraph, pub prim_headers: PrimitiveHeaders, - /// The GPU cache frame that the contents of Self depend on - pub gpu_cache_frame_id: FrameId, - /// List of textures that we don't know about yet /// from the backend thread. The render thread /// will use a callback to resolve these and diff --git a/gfx/wr/webrender/src/gpu_cache.rs b/gfx/wr/webrender/src/gpu_cache.rs @@ -1,935 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -//! Overview of the GPU cache. -//! -//! The main goal of the GPU cache is to allow on-demand -//! allocation and construction of GPU resources for the -//! vertex shaders to consume. -//! -//! Every item that wants to be stored in the GPU cache -//! should create a GpuCacheHandle that is used to refer -//! to a cached GPU resource. Creating a handle is a -//! cheap operation, that does *not* allocate room in the -//! cache. -//! -//! On any frame when that data is required, the caller -//! must request that handle, via ```request```. If the -//! data is not in the cache, the user provided closure -//! will be invoked to build the data. -//! -//! After ```end_frame``` has occurred, callers can -//! use the ```get_address``` API to get the allocated -//! address in the GPU cache of a given resource slot -//! for this frame. - -use api::{DebugFlags, DocumentId, PremultipliedColorF}; -#[cfg(test)] -use api::IdNamespace; -use api::units::*; -use euclid::{HomogeneousVector, Box2D}; -use crate::internal_types::{FastHashMap, FastHashSet, FrameStamp, FrameId}; -use crate::profiler::{self, TransactionProfile}; -use crate::prim_store::VECS_PER_SEGMENT; -use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; -use crate::util::VecHelper; -use std::{u16, u32}; -use std::num::NonZeroU32; -use std::ops::Add; -use std::time::{Duration, Instant}; - - -/// At the time of this writing, Firefox uses about 15 GPU cache rows on -/// startup, and then gradually works its way up to the mid-30s with normal -/// browsing. -pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20; -const NEW_ROWS_PER_RESIZE: i32 = 10; - -/// The number of frames an entry can go unused before being evicted. -const FRAMES_BEFORE_EVICTION: u64 = 10; - -/// The ratio of utilized blocks to total blocks for which we start the clock -/// on reclaiming memory. -const RECLAIM_THRESHOLD: f32 = 0.2; - -/// The amount of time utilization must be below the above threshold before we -/// blow away the cache and rebuild it. -const RECLAIM_DELAY_S: u64 = 5; - -#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -struct Epoch(u32); - -impl Epoch { - fn next(&mut self) { - *self = Epoch(self.0.wrapping_add(1)); - } -} - -#[derive(Debug, Copy, Clone, MallocSizeOf)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -struct CacheLocation { - block_index: BlockIndex, - epoch: Epoch, -} - -/// A single texel in RGBAF32 texture - 16 bytes. -#[derive(Copy, Clone, Debug, MallocSizeOf)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct GpuBlockData { - data: [f32; 4], -} - -impl GpuBlockData { - pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] }; -} - -/// Conversion helpers for GpuBlockData -impl From<PremultipliedColorF> for GpuBlockData { - fn from(c: PremultipliedColorF) -> Self { - GpuBlockData { - data: [c.r, c.g, c.b, c.a], - } - } -} - -impl From<[f32; 4]> for GpuBlockData { - fn from(data: [f32; 4]) -> Self { - GpuBlockData { data } - } -} - -impl<P> From<Box2D<f32, P>> for GpuBlockData { - fn from(r: Box2D<f32, P>) -> Self { - GpuBlockData { - data: [ - r.min.x, - r.min.y, - r.max.x, - r.max.y, - ], - } - } -} - -impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData { - fn from(v: HomogeneousVector<f32, P>) -> Self { - GpuBlockData { - data: [ - v.x, - v.y, - v.z, - v.w, - ], - } - } -} - -impl From<TexelRect> for GpuBlockData { - fn from(tr: TexelRect) -> Self { - GpuBlockData { - data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y], - } - } -} - - -// A handle to a GPU resource. -#[derive(Debug, Copy, Clone, MallocSizeOf)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct GpuCacheHandle { - location: Option<CacheLocation>, -} - -impl GpuCacheHandle { - pub fn new() -> Self { - GpuCacheHandle { location: None } - } - - pub fn as_int(self, gpu_cache: &GpuCache) -> i32 { - gpu_cache.get_address(&self).as_int() - } -} - -// A unique address in the GPU cache. These are uploaded -// as part of the primitive instances, to allow the vertex -// shader to fetch the specific data. -#[repr(C)] -#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -pub struct GpuCacheAddress { - pub u: u16, - pub v: u16, -} - -impl GpuCacheAddress { - fn new(u: usize, v: usize) -> Self { - GpuCacheAddress { - u: u as u16, - v: v as u16, - } - } - - pub const INVALID: GpuCacheAddress = GpuCacheAddress { - u: u16::MAX, - v: u16::MAX, - }; - - pub fn as_int(self) -> i32 { - // TODO(gw): Temporarily encode GPU Cache addresses as a single int. - // In the future, we can change the PrimitiveInstanceData struct - // to use 2x u16 for the vertex attribute instead of an i32. - self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32 - } -} - -impl Add<usize> for GpuCacheAddress { - type Output = GpuCacheAddress; - - fn add(self, other: usize) -> GpuCacheAddress { - GpuCacheAddress { - u: self.u + other as u16, - v: self.v, - } - } -} - -// An entry in a free-list of blocks in the GPU cache. -#[derive(Debug, MallocSizeOf)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -struct Block { - // The location in the cache of this block. - address: GpuCacheAddress, - // The current epoch (generation) of this block. - epoch: Epoch, - // Index of the next free block in the list it - // belongs to (either a free-list or the - // occupied list). - next: Option<BlockIndex>, - // The last frame this block was referenced. - last_access_time: FrameId, -} - -impl Block { - fn new( - address: GpuCacheAddress, - next: Option<BlockIndex>, - frame_id: FrameId, - epoch: Epoch, - ) -> Self { - Block { - address, - next, - last_access_time: frame_id, - epoch, - } - } - - fn advance_epoch(&mut self, max_epoch: &mut Epoch) { - self.epoch.next(); - if max_epoch.0 < self.epoch.0 { - max_epoch.0 = self.epoch.0; - } - } - - /// Creates an invalid dummy block ID. - pub const INVALID: Block = Block { - address: GpuCacheAddress { u: 0, v: 0 }, - epoch: Epoch(0), - next: None, - last_access_time: FrameId::INVALID, - }; -} - -/// Represents the index of a Block in the block array. We only create such -/// structs for blocks that represent the start of a chunk. -/// -/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32 -/// here and avoid ever using the index zero. -#[derive(Debug, Copy, Clone, MallocSizeOf)] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -struct BlockIndex(NonZeroU32); - -impl BlockIndex { - fn new(idx: usize) -> Self { - debug_assert!(idx <= u32::MAX as usize); - BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden")) - } - - fn get(&self) -> usize { - self.0.get() as usize - } -} - -// A row in the cache texture. -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -struct Row { - // The fixed size of blocks that this row supports. - // Each row becomes a slab allocator for a fixed block size. - // This means no dealing with fragmentation within a cache - // row as items are allocated and freed. - block_count_per_item: usize, -} - -impl Row { - fn new(block_count_per_item: usize) -> Self { - Row { - block_count_per_item, - } - } -} - -// A list of update operations that can be applied on the cache -// this frame. The list of updates is created by the render backend -// during frame construction. It's passed to the render thread -// where GL commands can be applied. -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -pub enum GpuCacheUpdate { - Copy { - block_index: usize, - block_count: usize, - address: GpuCacheAddress, - }, -} - -/// Command to inform the debug display in the renderer when chunks are allocated -/// or freed. -#[derive(MallocSizeOf)] -pub enum GpuCacheDebugCmd { - /// Describes an allocated chunk. - Alloc(GpuCacheDebugChunk), - /// Describes a freed chunk. - Free(GpuCacheAddress), -} - -#[derive(Clone, MallocSizeOf)] -pub struct GpuCacheDebugChunk { - pub address: GpuCacheAddress, - pub size: usize, -} - -#[must_use] -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -pub struct GpuCacheUpdateList { - /// The frame current update list was generated from. - pub frame_id: FrameId, - /// Whether the texture should be cleared before updates - /// are applied. - pub clear: bool, - /// The current height of the texture. The render thread - /// should resize the texture if required. - pub height: i32, - /// List of updates to apply. - pub updates: Vec<GpuCacheUpdate>, - /// A flat list of GPU blocks that are pending upload - /// to GPU memory. - pub blocks: Vec<GpuBlockData>, - /// Whole state GPU block metadata for debugging. - #[cfg_attr(feature = "serde", serde(skip))] - pub debug_commands: Vec<GpuCacheDebugCmd>, -} - -// Holds the free lists of fixed size blocks. Mostly -// just serves to work around the borrow checker. -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -struct FreeBlockLists { - free_list_1: Option<BlockIndex>, - free_list_2: Option<BlockIndex>, - free_list_4: Option<BlockIndex>, - free_list_8: Option<BlockIndex>, - free_list_16: Option<BlockIndex>, - free_list_32: Option<BlockIndex>, - free_list_64: Option<BlockIndex>, - free_list_128: Option<BlockIndex>, - free_list_256: Option<BlockIndex>, - free_list_341: Option<BlockIndex>, - free_list_512: Option<BlockIndex>, - free_list_1024: Option<BlockIndex>, -} - -impl FreeBlockLists { - fn new() -> Self { - FreeBlockLists { - free_list_1: None, - free_list_2: None, - free_list_4: None, - free_list_8: None, - free_list_16: None, - free_list_32: None, - free_list_64: None, - free_list_128: None, - free_list_256: None, - free_list_341: None, - free_list_512: None, - free_list_1024: None, - } - } - - fn get_actual_block_count_and_free_list( - &mut self, - block_count: usize, - ) -> (usize, &mut Option<BlockIndex>) { - // Find the appropriate free list to use based on the block size. - // - // Note that we cheat a bit with the 341 bucket, since it's not quite - // a divisor of 1024, because purecss-francine allocates many 260-block - // chunks, and there's no reason we shouldn't pack these three to a row. - // This means the allocation statistics will under-report by one block - // for each row using 341-block buckets, which is fine. - debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing"); - match block_count { - 0 => panic!("Can't allocate zero sized blocks!"), - 1 => (1, &mut self.free_list_1), - 2 => (2, &mut self.free_list_2), - 3..=4 => (4, &mut self.free_list_4), - 5..=8 => (8, &mut self.free_list_8), - 9..=16 => (16, &mut self.free_list_16), - 17..=32 => (32, &mut self.free_list_32), - 33..=64 => (64, &mut self.free_list_64), - 65..=128 => (128, &mut self.free_list_128), - 129..=256 => (256, &mut self.free_list_256), - 257..=341 => (341, &mut self.free_list_341), - 342..=512 => (512, &mut self.free_list_512), - 513..=1024 => (1024, &mut self.free_list_1024), - _ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"), - } - } -} - -// CPU-side representation of the GPU resource cache texture. -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -struct Texture { - // Current texture height - height: i32, - // All blocks that have been created for this texture - blocks: Vec<Block>, - // Metadata about each allocated row. - rows: Vec<Row>, - // The base Epoch for this texture. - base_epoch: Epoch, - // The maximum epoch reached. We track this along with the above so - // that we can rebuild the Texture and avoid collisions with handles - // allocated for the old texture. - max_epoch: Epoch, - // Free lists of available blocks for each supported - // block size in the texture. These are intrusive - // linked lists. - free_lists: FreeBlockLists, - // Linked list of currently occupied blocks. This - // makes it faster to iterate blocks looking for - // candidates to be evicted from the cache. - occupied_list_heads: FastHashMap<DocumentId, BlockIndex>, - // Pending blocks that have been written this frame - // and will need to be sent to the GPU. - pending_blocks: Vec<GpuBlockData>, - // Pending update commands. - updates: Vec<GpuCacheUpdate>, - // Profile stats - allocated_block_count: usize, - // The stamp at which we first reached our threshold for reclaiming `GpuCache` - // memory, or `None` if the threshold hasn't been reached. - #[cfg_attr(feature = "serde", serde(skip))] - reached_reclaim_threshold: Option<Instant>, - // List of debug commands to be sent to the renderer when the GPU cache - // debug display is enabled. - #[cfg_attr(feature = "serde", serde(skip))] - debug_commands: Vec<GpuCacheDebugCmd>, - // The current debug flags for the system. - debug_flags: DebugFlags, -} - -impl Texture { - fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self { - // Pre-fill the block array with one invalid block so that we never use - // 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which - // saves memory. - let blocks = vec![Block::INVALID]; - - Texture { - height: GPU_CACHE_INITIAL_HEIGHT, - blocks, - rows: Vec::new(), - base_epoch, - max_epoch: base_epoch, - free_lists: FreeBlockLists::new(), - pending_blocks: Vec::new(), - updates: Vec::new(), - occupied_list_heads: FastHashMap::default(), - allocated_block_count: 0, - reached_reclaim_threshold: None, - debug_commands: Vec::new(), - debug_flags, - } - } - - // Push new data into the cache. The ```pending_block_index``` field represents - // where the data was pushed into the texture ```pending_blocks``` array. - // Return the allocated address for this data. - fn push_data( - &mut self, - pending_block_index: Option<usize>, - block_count: usize, - frame_stamp: FrameStamp - ) -> CacheLocation { - debug_assert!(frame_stamp.is_valid()); - // Find the appropriate free list to use based on the block size. - let (alloc_size, free_list) = self.free_lists - .get_actual_block_count_and_free_list(block_count); - - // See if we need a new row (if free-list has nothing available) - if free_list.is_none() { - if self.rows.len() as i32 == self.height { - self.height += NEW_ROWS_PER_RESIZE; - } - - // Create a new row. - let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size; - let row_index = self.rows.len(); - self.rows.push(Row::new(alloc_size)); - - // Create a ```Block``` for each possible allocation address - // in this row, and link it in to the free-list for this - // block size. - let mut prev_block_index = None; - for i in 0 .. items_per_row { - let address = GpuCacheAddress::new(i * alloc_size, row_index); - let block_index = BlockIndex::new(self.blocks.len()); - let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch); - self.blocks.push(block); - prev_block_index = Some(block_index); - } - - *free_list = prev_block_index; - } - - // Given the code above, it's now guaranteed that there is a block - // available in the appropriate free-list. Pull a block from the - // head of the list. - let free_block_index = free_list.take().unwrap(); - let block = &mut self.blocks[free_block_index.get()]; - *free_list = block.next; - - // Add the block to the occupied linked list. - block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned(); - block.last_access_time = frame_stamp.frame_id(); - self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index); - self.allocated_block_count += alloc_size; - - if let Some(pending_block_index) = pending_block_index { - // Add this update to the pending list of blocks that need - // to be updated on the GPU. - self.updates.push(GpuCacheUpdate::Copy { - block_index: pending_block_index, - block_count, - address: block.address, - }); - } - - // If we're using the debug display, communicate the allocation to the - // renderer thread. Note that we do this regardless of whether or not - // pending_block_index is None (if it is, the renderer thread will fill - // in the data via a deferred resolve, but the block is still considered - // allocated). - if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) { - self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk { - address: block.address, - size: block_count, - })); - } - - CacheLocation { - block_index: free_block_index, - epoch: block.epoch, - } - } - - // Run through the list of occupied cache blocks and evict - // any old blocks that haven't been referenced for a while. - fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) { - debug_assert!(frame_stamp.is_valid()); - // Prune any old items from the list to make room. - // Traverse the occupied linked list and see - // which items have not been used for a long time. - let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x); - let mut prev_block: Option<BlockIndex> = None; - - while let Some(index) = current_block { - let (next_block, should_unlink) = { - let block = &mut self.blocks[index.get()]; - - let next_block = block.next; - let mut should_unlink = false; - - // If this resource has not been used in the last - // few frames, free it from the texture and mark - // as empty. - if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() { - should_unlink = true; - - // Get the row metadata from the address. - let row = &mut self.rows[block.address.v as usize]; - - // Use the row metadata to determine which free-list - // this block belongs to. - let (_, free_list) = self.free_lists - .get_actual_block_count_and_free_list(row.block_count_per_item); - - block.advance_epoch(&mut self.max_epoch); - block.next = *free_list; - *free_list = Some(index); - - self.allocated_block_count -= row.block_count_per_item; - - if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) { - let cmd = GpuCacheDebugCmd::Free(block.address); - self.debug_commands.push(cmd); - } - }; - - (next_block, should_unlink) - }; - - // If the block was released, we will need to remove it - // from the occupied linked list. - if should_unlink { - match prev_block { - Some(prev_block) => { - self.blocks[prev_block.get()].next = next_block; - } - None => { - match next_block { - Some(next_block) => { - self.occupied_list_heads.insert(frame_stamp.document_id(), next_block); - } - None => { - self.occupied_list_heads.remove(&frame_stamp.document_id()); - } - } - } - } - } else { - prev_block = current_block; - } - - current_block = next_block; - } - } - - /// Returns the ratio of utilized blocks. - fn utilization(&self) -> f32 { - let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH; - debug_assert!(total_blocks > 0); - let ratio = self.allocated_block_count as f32 / total_blocks as f32; - debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio); - ratio - } -} - - -/// A wrapper object for GPU data requests, -/// works as a container that can only grow. -#[must_use] -pub struct GpuDataRequest<'a> { - //TODO: remove this, see - // https://bugzilla.mozilla.org/show_bug.cgi?id=1690546 - #[allow(dead_code)] - handle: &'a mut GpuCacheHandle, - frame_stamp: FrameStamp, - start_index: usize, - max_block_count: usize, - texture: &'a mut Texture, -} - -impl<'a> GpuDataRequest<'a> { - pub fn push<B>(&mut self, block: B) - where - B: Into<GpuBlockData>, - { - self.texture.pending_blocks.push(block.into()); - } - - // Write the GPU cache data for an individual segment. - pub fn write_segment( - &mut self, - local_rect: LayoutRect, - extra_data: [f32; 4], - ) { - let _ = VECS_PER_SEGMENT; - self.push(local_rect); - self.push(extra_data); - } - - pub fn current_used_block_num(&self) -> usize { - self.texture.pending_blocks.len() - self.start_index - } -} - -impl<'a> Drop for GpuDataRequest<'a> { - fn drop(&mut self) { - // Push the data to the texture pending updates list. - let block_count = self.current_used_block_num(); - debug_assert!(block_count <= self.max_block_count); - - let location = self.texture - .push_data(Some(self.start_index), block_count, self.frame_stamp); - self.handle.location = Some(location); - } -} - - -/// The main LRU cache interface. -#[cfg_attr(feature = "capture", derive(Serialize))] -#[cfg_attr(feature = "replay", derive(Deserialize))] -#[derive(MallocSizeOf)] -pub struct GpuCache { - /// Current FrameId. - now: FrameStamp, - /// CPU-side texture allocator. - texture: Texture, - /// Number of blocks requested this frame that don't - /// need to be re-uploaded. - saved_block_count: usize, - /// The current debug flags for the system. - debug_flags: DebugFlags, - /// Whether there is a pending clear to send with the - /// next update. - pending_clear: bool, - /// Indicates that prepare_for_frames has been called for this group of frames. - /// Used for sanity checks. - prepared_for_frames: bool, - /// This indicates that we performed a cleanup operation which requires all - /// documents to build a frame. - requires_frame_build: bool, - /// The set of documents which have had frames built in this update. Used for - /// sanity checks. - document_frames_to_build: FastHashSet<DocumentId>, -} - -impl GpuCache { - pub fn new() -> Self { - let debug_flags = DebugFlags::empty(); - GpuCache { - now: FrameStamp::INVALID, - texture: Texture::new(Epoch(0), debug_flags), - saved_block_count: 0, - debug_flags, - pending_clear: false, - prepared_for_frames: false, - requires_frame_build: false, - document_frames_to_build: FastHashSet::default(), - } - } - - /// Creates a GpuCache and sets it up with a valid `FrameStamp`, which - /// is useful for avoiding panics when instantiating the `GpuCache` - /// directly from unit test code. - #[cfg(test)] - pub fn new_for_testing() -> Self { - let mut cache = Self::new(); - let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1)); - now.advance(); - cache.prepared_for_frames = true; - cache.begin_frame(now); - cache - } - - /// Drops everything in the GPU cache. Must not be called once gpu cache entries - /// for the next frame have already been requested. - pub fn clear(&mut self) { - assert!(self.texture.updates.is_empty(), "Clearing with pending updates"); - let mut next_base_epoch = self.texture.max_epoch; - next_base_epoch.next(); - self.texture = Texture::new(next_base_epoch, self.debug_flags); - self.saved_block_count = 0; - self.pending_clear = true; - self.requires_frame_build = true; - } - - pub fn requires_frame_build(&self) -> bool { - self.requires_frame_build - } - - pub fn prepare_for_frames(&mut self) { - self.prepared_for_frames = true; - if self.should_reclaim_memory() { - self.clear(); - debug_assert!(self.document_frames_to_build.is_empty()); - for &document_id in self.texture.occupied_list_heads.keys() { - self.document_frames_to_build.insert(document_id); - } - } - } - - pub fn bookkeep_after_frames(&mut self) { - assert!(self.document_frames_to_build.is_empty()); - assert!(self.prepared_for_frames); - self.requires_frame_build = false; - self.prepared_for_frames = false; - } - - /// Begin a new frame. - pub fn begin_frame(&mut self, stamp: FrameStamp) { - debug_assert!(self.texture.pending_blocks.is_empty()); - assert!(self.prepared_for_frames); - profile_scope!("begin_frame"); - self.now = stamp; - self.texture.evict_old_blocks(self.now); - self.saved_block_count = 0; - } - - // Invalidate a (possibly) existing block in the cache. - // This means the next call to request() for this location - // will rebuild the data and upload it to the GPU. - pub fn invalidate(&mut self, handle: &GpuCacheHandle) { - if let Some(ref location) = handle.location { - // don't invalidate blocks that are already re-assigned - if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) { - if block.epoch == location.epoch { - block.advance_epoch(&mut self.texture.max_epoch); - } - } - } - } - - /// Request a resource be added to the cache. If the resource - /// is already in the cache, `None` will be returned. - pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> { - let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH; - // Check if the allocation for this handle is still valid. - if let Some(ref location) = handle.location { - if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) { - if block.epoch == location.epoch { - max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item; - if block.last_access_time != self.now.frame_id() { - // Mark last access time to avoid evicting this block. - block.last_access_time = self.now.frame_id(); - self.saved_block_count += max_block_count; - } - return None; - } - } - } - - debug_assert!(self.now.is_valid()); - Some(GpuDataRequest { - handle, - frame_stamp: self.now, - start_index: self.texture.pending_blocks.len(), - texture: &mut self.texture, - max_block_count, - }) - } - - // Push an array of data blocks to be uploaded to the GPU - // unconditionally for this frame. The cache handle will - // assert if the caller tries to retrieve the address - // of this handle on a subsequent frame. This is typically - // used for uploading data that changes every frame, and - // therefore makes no sense to try and cache. - pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle { - let start_index = self.texture.pending_blocks.len(); - self.texture.pending_blocks.extend_from_slice(blocks); - let location = self.texture - .push_data(Some(start_index), blocks.len(), self.now); - GpuCacheHandle { - location: Some(location), - } - } - - /// End the frame. Return the list of updates to apply to the - /// device specific cache texture. - pub fn end_frame( - &mut self, - profile: &mut TransactionProfile, - ) -> FrameStamp { - profile_scope!("end_frame"); - profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len()); - profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count); - profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count); - - let reached_threshold = - self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) && - self.texture.utilization() < RECLAIM_THRESHOLD; - if reached_threshold { - self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now); - } else { - self.texture.reached_reclaim_threshold = None; - } - - self.document_frames_to_build.remove(&self.now.document_id()); - self.now - } - - /// Returns true if utilization has been low enough for long enough that we - /// should blow the cache away and rebuild it. - pub fn should_reclaim_memory(&self) -> bool { - self.texture.reached_reclaim_threshold - .map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S)) - } - - /// Extract the pending updates from the cache. - pub fn extract_updates(&mut self) -> GpuCacheUpdateList { - let clear = self.pending_clear; - self.pending_clear = false; - GpuCacheUpdateList { - frame_id: self.now.frame_id(), - clear, - height: self.texture.height, - debug_commands: self.texture.debug_commands.take_and_preallocate(), - updates: self.texture.updates.take_and_preallocate(), - blocks: self.texture.pending_blocks.take_and_preallocate(), - } - } - - /// Sets the current debug flags for the system. - pub fn set_debug_flags(&mut self, flags: DebugFlags) { - self.debug_flags = flags; - self.texture.debug_flags = flags; - } - - /// Get the actual GPU address in the texture for a given slot ID. - /// It's assumed at this point that the given slot has been requested - /// and built for this frame. Attempting to get the address for a - /// freed or pending slot will panic! - pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress { - self.try_get_address(id).expect("handle not requested or allocated!") - } - - /// Get the actual GPU address in the texture for a given slot ID. - /// - /// Returns None if the slot has not been requested. - pub fn try_get_address(&self, id: &GpuCacheHandle) -> Option<GpuCacheAddress> { - let Some(location) = id.location else { return None; }; - let block = &self.texture.blocks[location.block_index.get()]; - debug_assert_eq!(block.epoch, location.epoch); - debug_assert_eq!(block.last_access_time, self.now.frame_id()); - Some(block.address) - } -} - -#[test] -#[cfg(target_pointer_width = "64")] -fn test_struct_sizes() { - use std::mem; - // We can end up with a lot of blocks stored in the global vec, and keeping - // them small helps reduce memory overhead. - assert_eq!(mem::size_of::<Block>(), 24, "Block size changed"); -} diff --git a/gfx/wr/webrender/src/internal_types.rs b/gfx/wr/webrender/src/internal_types.rs @@ -10,7 +10,6 @@ use crate::render_api::DebugCommand; use crate::composite::NativeSurfaceOperation; use crate::device::TextureFilter; use crate::renderer::{FullFrameStats, PipelineInfo}; -use crate::gpu_cache::GpuCacheUpdateList; use crate::gpu_types::BlurEdgeMode; use crate::frame_builder::Frame; use crate::profiler::TransactionProfile; @@ -1350,7 +1349,6 @@ pub enum ResultMsg { DebugCommand(DebugCommand), DebugOutput(DebugOutput), RefreshShader(PathBuf), - UpdateGpuCache(GpuCacheUpdateList), UpdateResources { resource_updates: ResourceUpdateList, memory_pressure: bool, diff --git a/gfx/wr/webrender/src/lib.rs b/gfx/wr/webrender/src/lib.rs @@ -102,7 +102,6 @@ mod filterdata; mod frame_builder; mod freelist; mod glyph_cache; -mod gpu_cache; mod gpu_types; mod hit_test; mod internal_types; diff --git a/gfx/wr/webrender/src/picture.rs b/gfx/wr/webrender/src/picture.rs @@ -116,7 +116,6 @@ use crate::intern::ItemUid; use crate::internal_types::{FastHashMap, FastHashSet, PlaneSplitter, FilterGraphOp, FilterGraphNode, Filter, FrameId}; use crate::internal_types::{PlaneSplitterIndex, PlaneSplitAnchor, TextureSource}; use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext}; -use crate::gpu_cache::GpuCache; use crate::gpu_types::{UvRectKind, ZBufferId, BlurEdgeMode}; use peek_poke::{PeekPoke, poke_into_vec, peek_from_slice, ensure_red_zone}; use plane_split::{Clipper, Polygon}; @@ -6131,7 +6130,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, false, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { RenderTask::new_blur( blur_std_deviation, picture_task_id, @@ -6308,7 +6307,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { rg_builder.add().init( RenderTask::new_dynamic( task_size, @@ -6347,7 +6346,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { rg_builder.add().init( RenderTask::new_dynamic( surface_rects.task_size, @@ -6386,7 +6385,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { rg_builder.add().init( RenderTask::new_dynamic( surface_rects.task_size, @@ -6426,7 +6425,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { rg_builder.add().init( RenderTask::new_dynamic( surface_rects.task_size, @@ -6471,7 +6470,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { rg_builder.add().init( RenderTask::new_dynamic( surface_rects.task_size, @@ -6530,7 +6529,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, is_opaque, - &mut|rg_builder, _, _| { + &mut|rg_builder, _| { RenderTask::new_svg_filter( primitives, filter_datas, @@ -6605,7 +6604,7 @@ impl PicturePrimitive { &self.snapshot, &surface_rects, false, - &mut|rg_builder, gpu_buffer, _| { + &mut|rg_builder, gpu_buffer| { RenderTask::new_svg_filter_graph( filters, rg_builder, @@ -6770,7 +6769,7 @@ impl PicturePrimitive { PicturePrimitive::resolve_split_planes( splitter, list, - &mut frame_state.gpu_cache, + &mut frame_state.frame_gpu_data.f32, &frame_context.spatial_tree, ); @@ -6879,7 +6878,7 @@ impl PicturePrimitive { fn resolve_split_planes( splitter: &mut PlaneSplitter, ordered: &mut Vec<OrderedPictureChild>, - gpu_cache: &mut GpuCache, + gpu_buffer: &mut GpuBufferBuilderF, spatial_tree: &SpatialTree, ) { ordered.clear(); @@ -7243,7 +7242,7 @@ impl PicturePrimitive { } }; - // TODO(gw): Almost all of the Picture types below use extra_gpu_cache_data + // TODO(gw): Almost all of the Picture types below use extra_gpu_data // to store the same type of data. The exception is the filter // with a ColorMatrix, which stores the color matrix here. It's // probably worth tidying this code up to be a bit more consistent. @@ -8614,7 +8613,7 @@ fn request_render_task( snapshot: &Option<SnapshotInfo>, surface_rects: &SurfaceAllocInfo, is_opaque: bool, - f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF, &mut GpuCache) -> RenderTaskId, + f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF) -> RenderTaskId, ) -> RenderTaskId { let task_id = match snapshot { @@ -8628,7 +8627,6 @@ fn request_render_task( surface_rects.task_size, frame_state.rg_builder, &mut frame_state.frame_gpu_data.f32, - frame_state.gpu_cache, is_opaque, &adjustment, f @@ -8651,7 +8649,6 @@ fn request_render_task( f( frame_state.rg_builder, &mut frame_state.frame_gpu_data.f32, - frame_state.gpu_cache ) } }; diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs @@ -445,11 +445,10 @@ fn prepare_interned_prim_for_render( }), false, RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, _, _| { + &mut |rg_builder, _| { rg_builder.add().init(RenderTask::new_dynamic( task_size, RenderTaskKind::new_line_decoration( @@ -597,11 +596,10 @@ fn prepare_interned_prim_for_render( Some(cache_key), false, // TODO(gw): We don't calculate opacity for borders yet! RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, _, _| { + &mut |rg_builder, _| { rg_builder.add().init(RenderTask::new_dynamic( cache_size, RenderTaskKind::new_border_segment( @@ -1557,7 +1555,6 @@ pub fn update_clip_task( instance.vis.clip_chain.clips_range, root_spatial_node_index, frame_state.clip_store, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.resource_cache, frame_state.rg_builder, @@ -1623,7 +1620,6 @@ pub fn update_brush_segment_clip_task( clip_chain.clips_range, root_spatial_node_index, frame_state.clip_store, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.resource_cache, frame_state.rg_builder, diff --git a/gfx/wr/webrender/src/prim_store/gradient/conic.rs b/gfx/wr/webrender/src/prim_store/gradient/conic.rs @@ -297,11 +297,10 @@ impl ConicGradientTemplate { }), false, RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, gpu_buffer_builder, _| { + &mut |rg_builder, gpu_buffer_builder| { let stops = GradientGpuBlockBuilder::build( false, gpu_buffer_builder, diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs @@ -562,11 +562,10 @@ impl LinearGradientTemplate { }), false, RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, _, _| { + &mut |rg_builder, _| { rg_builder.add().init(RenderTask::new_dynamic( self.task_size, RenderTaskKind::FastLinearGradient(gradient), @@ -591,11 +590,10 @@ impl LinearGradientTemplate { }), false, RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, gpu_buffer_builder, _| { + &mut |rg_builder, gpu_buffer_builder| { let stops = Some(GradientGpuBlockBuilder::build( self.reverse_stops, gpu_buffer_builder, diff --git a/gfx/wr/webrender/src/prim_store/gradient/radial.rs b/gfx/wr/webrender/src/prim_store/gradient/radial.rs @@ -266,11 +266,10 @@ impl RadialGradientTemplate { }), false, RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, gpu_buffer_builder, _| { + &mut |rg_builder, gpu_buffer_builder| { let stops = GradientGpuBlockBuilder::build( false, gpu_buffer_builder, diff --git a/gfx/wr/webrender/src/prim_store/image.rs b/gfx/wr/webrender/src/prim_store/image.rs @@ -274,11 +274,10 @@ impl ImageData { }), descriptor.is_opaque(), RenderTaskParent::Surface, - frame_state.gpu_cache, &mut frame_state.frame_gpu_data.f32, frame_state.rg_builder, &mut frame_state.surface_builder, - &mut |rg_builder, _, _| { + &mut |rg_builder, _| { // Create a task to blit from the texture cache to // a normal transient render task surface. // TODO: figure out if/when we can do a blit instead. diff --git a/gfx/wr/webrender/src/profiler.rs b/gfx/wr/webrender/src/profiler.rs @@ -27,7 +27,7 @@ use crate::renderer::DebugRenderer; use crate::device::query::GpuTimer; use euclid::{Point2D, Rect, Size2D, vec2, default}; use crate::internal_types::FastHashMap; -use crate::renderer::{FullFrameStats, MAX_VERTEX_TEXTURE_WIDTH, init::wr_has_been_initialized}; +use crate::renderer::{FullFrameStats, init::wr_has_been_initialized}; use api::units::DeviceIntSize; use std::collections::vec_deque::VecDeque; use std::fmt::{Write, Debug}; @@ -150,144 +150,134 @@ pub const UPLOAD_NUM_COPY_BATCHES: usize = 23; pub const TOTAL_UPLOAD_TIME: usize = 24; pub const CREATE_CACHE_TEXTURE_TIME: usize = 25; pub const DELETE_CACHE_TEXTURE_TIME: usize = 26; -pub const GPU_CACHE_UPLOAD_TIME: usize = 27; -pub const RASTERIZED_BLOBS: usize = 28; -pub const RASTERIZED_BLOB_TILES: usize = 29; -pub const RASTERIZED_BLOBS_PX: usize = 30; -pub const BLOB_RASTERIZATION_TIME: usize = 31; +pub const RASTERIZED_BLOBS: usize = 27; +pub const RASTERIZED_BLOB_TILES: usize = 28; +pub const RASTERIZED_BLOBS_PX: usize = 29; +pub const BLOB_RASTERIZATION_TIME: usize = 30; -pub const RASTERIZED_GLYPHS: usize = 32; -pub const GLYPH_RESOLVE_TIME: usize = 33; +pub const RASTERIZED_GLYPHS: usize = 31; +pub const GLYPH_RESOLVE_TIME: usize = 32; -pub const DRAW_CALLS: usize = 34; -pub const VERTICES: usize = 35; -pub const PRIMITIVES: usize = 36; -pub const VISIBLE_PRIMITIVES: usize = 37; +pub const DRAW_CALLS: usize = 33; +pub const VERTICES: usize = 34; +pub const PRIMITIVES: usize = 35; +pub const VISIBLE_PRIMITIVES: usize = 36; -pub const USED_TARGETS: usize = 38; -pub const CREATED_TARGETS: usize = 39; -pub const PICTURE_CACHE_SLICES: usize = 40; +pub const USED_TARGETS: usize = 37; +pub const CREATED_TARGETS: usize = 38; +pub const PICTURE_CACHE_SLICES: usize = 39; -pub const COLOR_PASSES: usize = 41; -pub const ALPHA_PASSES: usize = 42; -pub const PICTURE_TILES: usize = 43; -pub const RENDERED_PICTURE_TILES: usize = 44; +pub const COLOR_PASSES: usize = 40; +pub const ALPHA_PASSES: usize = 41; +pub const PICTURE_TILES: usize = 42; +pub const RENDERED_PICTURE_TILES: usize = 43; -pub const FONT_TEMPLATES: usize = 45; -pub const FONT_TEMPLATES_MEM: usize = 46; -pub const IMAGE_TEMPLATES: usize = 47; -pub const IMAGE_TEMPLATES_MEM: usize = 48; - -pub const GPU_CACHE_ROWS_TOTAL: usize = 49; -pub const GPU_CACHE_ROWS_UPDATED: usize = 50; -pub const GPU_CACHE_BLOCKS_TOTAL: usize = 51; -pub const GPU_CACHE_BLOCKS_UPDATED: usize = 52; -pub const GPU_CACHE_BLOCKS_SAVED: usize = 53; +pub const FONT_TEMPLATES: usize = 44; +pub const FONT_TEMPLATES_MEM: usize = 45; +pub const IMAGE_TEMPLATES: usize = 46; +pub const IMAGE_TEMPLATES_MEM: usize = 47; // Atlas items represents the area occupied by items in the cache textures. // The actual texture memory allocated is ATLAS_TEXTURES_MEM. -pub const ATLAS_ITEMS_MEM: usize = 54; -pub const ATLAS_A8_PIXELS: usize = 55; -pub const ATLAS_A8_TEXTURES: usize = 56; -pub const ATLAS_A16_PIXELS: usize = 57; -pub const ATLAS_A16_TEXTURES: usize = 58; -pub const ATLAS_RGBA8_LINEAR_PIXELS: usize = 59; -pub const ATLAS_RGBA8_LINEAR_TEXTURES: usize = 60; -pub const ATLAS_RGBA8_NEAREST_PIXELS: usize = 61; -pub const ATLAS_RGBA8_NEAREST_TEXTURES: usize = 62; -pub const ATLAS_RGBA8_GLYPHS_PIXELS: usize = 63; -pub const ATLAS_RGBA8_GLYPHS_TEXTURES: usize = 64; -pub const ATLAS_A8_GLYPHS_PIXELS: usize = 65; -pub const ATLAS_A8_GLYPHS_TEXTURES: usize = 66; -pub const ATLAS_COLOR8_LINEAR_PRESSURE: usize = 67; -pub const ATLAS_COLOR8_NEAREST_PRESSURE: usize = 68; -pub const ATLAS_COLOR8_GLYPHS_PRESSURE: usize = 69; -pub const ATLAS_ALPHA8_PRESSURE: usize = 70; -pub const ATLAS_ALPHA8_GLYPHS_PRESSURE: usize = 71; -pub const ATLAS_ALPHA16_PRESSURE: usize = 72; -pub const ATLAS_STANDALONE_PRESSURE: usize = 73; - -pub const TEXTURE_CACHE_EVICTION_COUNT: usize = 74; -pub const TEXTURE_CACHE_YOUNGEST_EVICTION: usize = 75; -pub const EXTERNAL_IMAGE_BYTES: usize = 76; -pub const ATLAS_TEXTURES_MEM: usize = 77; -pub const STANDALONE_TEXTURES_MEM: usize = 78; -pub const PICTURE_TILES_MEM: usize = 79; -pub const RENDER_TARGET_MEM: usize = 80; - -pub const ALPHA_TARGETS_SAMPLERS: usize = 81; -pub const TRANSPARENT_PASS_SAMPLERS: usize = 82; -pub const OPAQUE_PASS_SAMPLERS: usize = 83; -pub const TOTAL_SAMPLERS: usize = 84; - -pub const INTERNED_PRIMITIVES: usize = 85; -pub const INTERNED_CLIPS: usize = 86; -pub const INTERNED_TEXT_RUNS: usize = 87; -pub const INTERNED_NORMAL_BORDERS: usize = 88; -pub const INTERNED_IMAGE_BORDERS: usize = 89; -pub const INTERNED_IMAGES: usize = 90; -pub const INTERNED_YUV_IMAGES: usize = 91; -pub const INTERNED_LINE_DECORATIONS: usize = 92; -pub const INTERNED_LINEAR_GRADIENTS: usize = 93; -pub const INTERNED_RADIAL_GRADIENTS: usize = 94; -pub const INTERNED_CONIC_GRADIENTS: usize = 95; -pub const INTERNED_PICTURES: usize = 96; -pub const INTERNED_FILTER_DATA: usize = 97; -pub const INTERNED_BACKDROP_CAPTURES: usize = 98; -pub const INTERNED_BACKDROP_RENDERS: usize = 99; -pub const INTERNED_POLYGONS: usize = 100; -pub const INTERNED_BOX_SHADOWS: usize = 101; -pub const DEPTH_TARGETS_MEM: usize = 102; - -pub const SHADER_BUILD_TIME: usize = 103; - -pub const RENDER_REASON_FIRST: usize = 104; -pub const RENDER_REASON_SCENE: usize = 104; -pub const RENDER_REASON_ANIMATED_PROPERTY: usize = 105; -pub const RENDER_REASON_RESOURCE_UPDATE: usize = 106; -pub const RENDER_REASON_ASYNC_IMAGE: usize = 107; -pub const RENDER_REASON_CLEAR_RESOURCES: usize = 108; -pub const RENDER_REASON_APZ: usize = 109; -pub const RENDER_REASON_RESIZE: usize = 110; -pub const RENDER_REASON_WIDGET: usize = 111; -pub const RENDER_REASON_TEXTURE_CACHE_FLUSH: usize = 112; -pub const RENDER_REASON_SNAPSHOT: usize = 113; -pub const RENDER_REASON_POST_RESOURCE_UPDATE_HOOKS: usize = 114; -pub const RENDER_REASON_CONFIG_CHANGE: usize = 115; -pub const RENDER_REASON_CONTENT_SYNC: usize = 116; -pub const RENDER_REASON_FLUSH: usize = 117; -pub const RENDER_REASON_TESTING: usize = 118; -pub const RENDER_REASON_OTHER: usize = 119; -pub const RENDER_REASON_VSYNC: usize = 120; - -pub const TEXTURES_CREATED: usize = 121; -pub const TEXTURES_DELETED: usize = 122; - -pub const SLOW_FRAME_CPU_COUNT: usize = 123; -pub const SLOW_FRAME_GPU_COUNT: usize = 124; -pub const SLOW_FRAME_BUILD_COUNT: usize = 125; -pub const SLOW_UPLOAD_COUNT: usize = 126; -pub const SLOW_RENDER_COUNT: usize = 127; -pub const SLOW_DRAW_CALLS_COUNT: usize = 128; -pub const SLOW_TARGETS_COUNT: usize = 129; -pub const SLOW_BLOB_COUNT: usize = 130; -pub const SLOW_SCROLL_AFTER_SCENE_COUNT: usize = 131; - -pub const GPU_CACHE_MEM: usize = 132; -pub const GPU_BUFFER_MEM: usize = 133; -pub const GPU_TOTAL_MEM: usize = 134; - -pub const GPU_CACHE_PREPARE_TIME: usize = 135; - -pub const FRAME_SEND_TIME: usize = 136; -pub const UPDATE_DOCUMENT_TIME: usize = 137; - -pub const COMPOSITOR_SURFACE_UNDERLAYS: usize = 138; -pub const COMPOSITOR_SURFACE_OVERLAYS: usize = 139; -pub const COMPOSITOR_SURFACE_BLITS: usize = 140; - -pub const NUM_PROFILER_EVENTS: usize = 141; +pub const ATLAS_ITEMS_MEM: usize = 48; +pub const ATLAS_A8_PIXELS: usize = 49; +pub const ATLAS_A8_TEXTURES: usize = 50; +pub const ATLAS_A16_PIXELS: usize = 51; +pub const ATLAS_A16_TEXTURES: usize = 52; +pub const ATLAS_RGBA8_LINEAR_PIXELS: usize = 53; +pub const ATLAS_RGBA8_LINEAR_TEXTURES: usize = 54; +pub const ATLAS_RGBA8_NEAREST_PIXELS: usize = 55; +pub const ATLAS_RGBA8_NEAREST_TEXTURES: usize = 56; +pub const ATLAS_RGBA8_GLYPHS_PIXELS: usize = 57; +pub const ATLAS_RGBA8_GLYPHS_TEXTURES: usize = 58; +pub const ATLAS_A8_GLYPHS_PIXELS: usize = 59; +pub const ATLAS_A8_GLYPHS_TEXTURES: usize = 60; +pub const ATLAS_COLOR8_LINEAR_PRESSURE: usize = 61; +pub const ATLAS_COLOR8_NEAREST_PRESSURE: usize = 62; +pub const ATLAS_COLOR8_GLYPHS_PRESSURE: usize = 63; +pub const ATLAS_ALPHA8_PRESSURE: usize = 64; +pub const ATLAS_ALPHA8_GLYPHS_PRESSURE: usize = 65; +pub const ATLAS_ALPHA16_PRESSURE: usize = 66; +pub const ATLAS_STANDALONE_PRESSURE: usize = 67; + +pub const TEXTURE_CACHE_EVICTION_COUNT: usize = 68; +pub const TEXTURE_CACHE_YOUNGEST_EVICTION: usize = 69; +pub const EXTERNAL_IMAGE_BYTES: usize = 70; +pub const ATLAS_TEXTURES_MEM: usize = 71; +pub const STANDALONE_TEXTURES_MEM: usize = 72; +pub const PICTURE_TILES_MEM: usize = 73; +pub const RENDER_TARGET_MEM: usize = 74; + +pub const ALPHA_TARGETS_SAMPLERS: usize = 75; +pub const TRANSPARENT_PASS_SAMPLERS: usize = 76; +pub const OPAQUE_PASS_SAMPLERS: usize = 77; +pub const TOTAL_SAMPLERS: usize = 78; + +pub const INTERNED_PRIMITIVES: usize = 79; +pub const INTERNED_CLIPS: usize = 80; +pub const INTERNED_TEXT_RUNS: usize = 81; +pub const INTERNED_NORMAL_BORDERS: usize = 82; +pub const INTERNED_IMAGE_BORDERS: usize = 83; +pub const INTERNED_IMAGES: usize = 84; +pub const INTERNED_YUV_IMAGES: usize = 85; +pub const INTERNED_LINE_DECORATIONS: usize = 86; +pub const INTERNED_LINEAR_GRADIENTS: usize = 87; +pub const INTERNED_RADIAL_GRADIENTS: usize = 88; +pub const INTERNED_CONIC_GRADIENTS: usize = 89; +pub const INTERNED_PICTURES: usize = 90; +pub const INTERNED_FILTER_DATA: usize = 91; +pub const INTERNED_BACKDROP_CAPTURES: usize = 92; +pub const INTERNED_BACKDROP_RENDERS: usize = 93; +pub const INTERNED_POLYGONS: usize = 94; +pub const INTERNED_BOX_SHADOWS: usize = 95; +pub const DEPTH_TARGETS_MEM: usize = 96; + +pub const SHADER_BUILD_TIME: usize = 97; + +pub const RENDER_REASON_FIRST: usize = 98; +pub const RENDER_REASON_SCENE: usize = 99; +pub const RENDER_REASON_ANIMATED_PROPERTY: usize = 100; +pub const RENDER_REASON_RESOURCE_UPDATE: usize = 101; +pub const RENDER_REASON_ASYNC_IMAGE: usize = 102; +pub const RENDER_REASON_CLEAR_RESOURCES: usize = 103; +pub const RENDER_REASON_APZ: usize = 104; +pub const RENDER_REASON_RESIZE: usize = 105; +pub const RENDER_REASON_WIDGET: usize = 106; +pub const RENDER_REASON_TEXTURE_CACHE_FLUSH: usize = 107; +pub const RENDER_REASON_SNAPSHOT: usize = 108; +pub const RENDER_REASON_POST_RESOURCE_UPDATE_HOOKS: usize = 109; +pub const RENDER_REASON_CONFIG_CHANGE: usize = 110; +pub const RENDER_REASON_CONTENT_SYNC: usize = 111; +pub const RENDER_REASON_FLUSH: usize = 112; +pub const RENDER_REASON_TESTING: usize = 113; +pub const RENDER_REASON_OTHER: usize = 114; +pub const RENDER_REASON_VSYNC: usize = 115; + +pub const TEXTURES_CREATED: usize = 116; +pub const TEXTURES_DELETED: usize = 117; + +pub const SLOW_FRAME_CPU_COUNT: usize = 118; +pub const SLOW_FRAME_GPU_COUNT: usize = 119; +pub const SLOW_FRAME_BUILD_COUNT: usize = 120; +pub const SLOW_UPLOAD_COUNT: usize = 121; +pub const SLOW_RENDER_COUNT: usize = 122; +pub const SLOW_DRAW_CALLS_COUNT: usize = 123; +pub const SLOW_TARGETS_COUNT: usize = 124; +pub const SLOW_BLOB_COUNT: usize = 125; +pub const SLOW_SCROLL_AFTER_SCENE_COUNT: usize = 126; + +pub const GPU_BUFFER_MEM: usize = 127; +pub const GPU_TOTAL_MEM: usize = 128; + +pub const FRAME_SEND_TIME: usize = 129; +pub const UPDATE_DOCUMENT_TIME: usize = 130; + +pub const COMPOSITOR_SURFACE_UNDERLAYS: usize = 131; +pub const COMPOSITOR_SURFACE_OVERLAYS: usize = 132; +pub const COMPOSITOR_SURFACE_BLITS: usize = 133; + +pub const NUM_PROFILER_EVENTS: usize = 134; pub struct Profiler { counters: Vec<Counter>, @@ -376,7 +366,6 @@ impl Profiler { float("Texture cache upload", "ms", TOTAL_UPLOAD_TIME, expected(0.0..5.0)), float("Cache texture creation", "ms", CREATE_CACHE_TEXTURE_TIME, expected(0.0..2.0)), float("Cache texture deletion", "ms", DELETE_CACHE_TEXTURE_TIME, expected(0.0..1.0)), - float("GPU cache upload", "ms", GPU_CACHE_UPLOAD_TIME, expected(0.0..2.0)), int("Rasterized blobs", "", RASTERIZED_BLOBS, expected(0..15)), int("Rasterized blob tiles", "", RASTERIZED_BLOB_TILES, expected(0..15)), @@ -405,12 +394,6 @@ impl Profiler { int("Image templates", "", IMAGE_TEMPLATES, expected(0..100)), float("Image templates mem", "MB", IMAGE_TEMPLATES_MEM, expected(0.0..50.0)), - int("GPU cache rows total", "", GPU_CACHE_ROWS_TOTAL, expected(1..50)), - int("GPU cache rows updated", "", GPU_CACHE_ROWS_UPDATED, expected(0..25)), - int("GPU blocks total", "", GPU_CACHE_BLOCKS_TOTAL, expected(1..65_000)), - int("GPU blocks updated", "", GPU_CACHE_BLOCKS_UPDATED, expected(0..1000)), - int("GPU blocks saved", "", GPU_CACHE_BLOCKS_SAVED, expected(0..50_000)), - float("Atlas items mem", "MB", ATLAS_ITEMS_MEM, expected(0.0..100.0)), int("Atlas A8 pixels", "px", ATLAS_A8_PIXELS, expected(0..1_000_000)), int("Atlas A8 textures", "", ATLAS_A8_TEXTURES, expected(0..2)), @@ -466,6 +449,7 @@ impl Profiler { float("Depth targets mem", "MB", DEPTH_TARGETS_MEM, Expected::none()), float("Shader build time", "ms", SHADER_BUILD_TIME, Expected::none()), // We use the expected range to highlight render reasons that are happening. + float("Reason First", "", RENDER_REASON_FIRST, expected(0.0..0.01)), float("Reason scene", "", RENDER_REASON_SCENE, expected(0.0..0.01)), float("Reason animated property", "", RENDER_REASON_ANIMATED_PROPERTY, expected(0.0..0.01)), float("Reason resource update", "", RENDER_REASON_RESOURCE_UPDATE, expected(0.0..0.01)), @@ -497,11 +481,9 @@ impl Profiler { int("Slow: blobs", "%", SLOW_BLOB_COUNT, Expected::none()), int("Slow: after scene", "%", SLOW_SCROLL_AFTER_SCENE_COUNT, Expected::none()), - float("GPU cache mem", "MB", GPU_CACHE_MEM, Expected::none()), float("GPU buffer mem", "MB", GPU_BUFFER_MEM, Expected::none()), float("GPU total mem", "MB", GPU_TOTAL_MEM, Expected::none()), - float("GPU cache preapre", "ms", GPU_CACHE_PREPARE_TIME, Expected::none()), float("Frame send", "ms", FRAME_SEND_TIME, Expected::none()), float("Update document", "ms", UPDATE_DOCUMENT_TIME, Expected::none()), @@ -707,7 +689,6 @@ impl Profiler { RENDER_TARGET_MEM, DEPTH_TARGETS_MEM, ATLAS_ITEMS_MEM, - GPU_CACHE_MEM, GPU_BUFFER_MEM, ] { if let Some(val) = self.counters[counter].get() { @@ -805,10 +786,6 @@ impl Profiler { flush_counters(&mut counters, selection); selection.push(Item::GpuTimeQueries); } - "GPU cache bars" => { - flush_counters(&mut counters, selection); - selection.push(Item::GpuCacheBars); - } "Paint phase graph" => { flush_counters(&mut counters, selection); selection.push(Item::PaintPhaseGraph); @@ -857,10 +834,6 @@ impl Profiler { &self.counters } - pub fn get(&self, id: usize) -> Option<f64> { - self.counters[id].get() - } - fn draw_counters( counters: &[Counter], selected: &[usize], @@ -1099,102 +1072,6 @@ impl Profiler { } } - fn draw_bar( - label: &str, - label_color: ColorU, - counters: &[(ColorU, usize)], - x: f32, y: f32, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect<f32> { - let x = x + 8.0; - let y = y + 24.0; - let text_rect = debug_renderer.add_text( - x, y, - label, - label_color, - None, - ); - - let x_base = text_rect.max_x() + 10.0; - let width = 300.0; - let total_value = counters.last().unwrap().1; - let scale = width / total_value as f32; - let mut x_current = x_base; - - for &(color, counter) in counters { - let x_stop = x_base + counter as f32 * scale; - debug_renderer.add_quad( - x_current, - text_rect.origin.y, - x_stop, - text_rect.max_y(), - color, - color, - ); - x_current = x_stop; - - } - - let mut total_rect = text_rect; - total_rect.size.width += width + 10.0; - - total_rect - } - - fn draw_gpu_cache_bars(&self, x: f32, mut y: f32, text_buffer: &mut String, debug_renderer: &mut DebugRenderer) -> default::Rect<f32> { - let color_updated = ColorU::new(0xFF, 0, 0, 0xFF); - let color_free = ColorU::new(0, 0, 0xFF, 0xFF); - let color_saved = ColorU::new(0, 0xFF, 0, 0xFF); - - let updated_blocks = self.get(GPU_CACHE_BLOCKS_UPDATED).unwrap_or(0.0) as usize; - let saved_blocks = self.get(GPU_CACHE_BLOCKS_SAVED).unwrap_or(0.0) as usize; - let allocated_blocks = self.get(GPU_CACHE_BLOCKS_TOTAL).unwrap_or(0.0) as usize; - let allocated_rows = self.get(GPU_CACHE_ROWS_TOTAL).unwrap_or(0.0) as usize; - let updated_rows = self.get(GPU_CACHE_ROWS_UPDATED).unwrap_or(0.0) as usize; - let requested_blocks = updated_blocks + saved_blocks; - let total_blocks = allocated_rows * MAX_VERTEX_TEXTURE_WIDTH; - - set_text!(text_buffer, "GPU cache rows ({}):", allocated_rows); - - let rect0 = Profiler::draw_bar( - text_buffer, - ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), - &[ - (color_updated, updated_rows), - (color_free, allocated_rows), - ], - x, y, - debug_renderer, - ); - - y = rect0.max_y(); - - let rect1 = Profiler::draw_bar( - "GPU cache blocks", - ColorU::new(0xFF, 0xFF, 0, 0xFF), - &[ - (color_updated, updated_blocks), - (color_saved, requested_blocks), - (color_free, allocated_blocks), - (ColorU::new(0, 0, 0, 0xFF), total_blocks), - ], - x, y, - debug_renderer, - ); - - let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); - debug_renderer.add_quad( - total_rect.origin.x, - total_rect.origin.y, - total_rect.origin.x + total_rect.size.width, - total_rect.origin.y + total_rect.size.height, - ColorF::new(0.1, 0.1, 0.1, 0.8).into(), - ColorF::new(0.2, 0.2, 0.2, 0.8).into(), - ); - - total_rect - } - // Draws a frame graph for a given frame collection. fn draw_frame_graph( frame_collection: &ProfilerFrameCollection, @@ -1360,9 +1237,6 @@ impl Profiler { Item::GpuTimeQueries => { Profiler::draw_frame_graph(&self.gpu_frames, x, y, debug_renderer) } - Item::GpuCacheBars => { - self.draw_gpu_cache_bars(x, y, &mut text_buffer, debug_renderer) - } Item::PaintPhaseGraph => { Profiler::draw_frame_graph(&self.frame_stats, x, y, debug_renderer) } @@ -2073,7 +1947,6 @@ pub struct CpuFrameTimings { pub frame_building_other: f64, pub frame_send: f64, pub uploads: f64, - pub gpu_cache: f64, pub draw_calls: f64, pub unknown: f64, } @@ -2089,10 +1962,9 @@ impl CpuFrameTimings { let frame_send = counters[FRAME_SEND_TIME].get().unwrap_or(0.0); let renderer = counters[RENDERER_TIME].get().unwrap_or(0.0); let uploads = counters[TEXTURE_CACHE_UPDATE_TIME].get().unwrap_or(0.0); - let gpu_cache = counters[GPU_CACHE_PREPARE_TIME].get().unwrap_or(0.0); let frame_build = visibility + prepare + glyph_resolve + batching; let update_document = counters[UPDATE_DOCUMENT_TIME].get().unwrap_or(0.0) - frame_build; - let draw_calls = renderer - uploads - gpu_cache; + let draw_calls = renderer - uploads; let unknown = (total - (api_send + update_document + frame_build + frame_send + renderer)).max(0.0); let frame_building_other = (counters[FRAME_BUILDING_TIME].get().unwrap_or(0.0) - frame_build).max(0.0); @@ -2107,7 +1979,6 @@ impl CpuFrameTimings { frame_building_other, frame_send, uploads, - gpu_cache, draw_calls, unknown, } @@ -2139,10 +2010,9 @@ impl CpuFrameTimings { sample(self.frame_send, "08. frame send", ColorF { r: 1.0, g: 0.8, b: 0.8, a: 1.0 }), // Renderer sample(self.uploads, "09. texture uploads", ColorF { r: 0.8, g: 0.0, b: 0.3, a: 1.0 }), - sample(self.gpu_cache, "10. gpu cache update", ColorF { r: 0.5, g: 0.0, b: 0.4, a: 1.0 }), - sample(self.draw_calls, "11. draw calls", ColorF { r: 1.0, g: 0.5, b: 0.0, a: 1.0 }), + sample(self.draw_calls, "10. draw calls", ColorF { r: 1.0, g: 0.5, b: 0.0, a: 1.0 }), // Unaccounted time - sample(self.unknown, "12. unknown", ColorF { r: 0.3, g: 0.3, b: 0.3, a: 1.0 }), + sample(self.unknown, "11. unknown", ColorF { r: 0.3, g: 0.3, b: 0.3, a: 1.0 }), ], } } @@ -2167,7 +2037,6 @@ enum Item { ChangeIndicator(usize), Fps, GpuTimeQueries, - GpuCacheBars, PaintPhaseGraph, SlowScrollFrames, Text(String), diff --git a/gfx/wr/webrender/src/render_api.rs b/gfx/wr/webrender/src/render_api.rs @@ -973,8 +973,6 @@ pub enum DebugCommand { EnableNativeCompositor(bool), /// Sets the maximum amount of existing batches to visit before creating a new one. SetBatchingLookback(u32), - /// Invalidate GPU cache, forcing the update from the CPU mirror. - InvalidateGpuCache, /// Causes the scene builder to pause for a given amount of milliseconds each time it /// processes a transaction. SimulateLongSceneBuild(u32), @@ -1490,8 +1488,6 @@ pub struct MemoryReport { // CPU Memory. // pub clip_stores: usize, - pub gpu_cache_metadata: usize, - pub gpu_cache_cpu_mirror: usize, pub hit_testers: usize, pub fonts: usize, pub weak_fonts: usize, @@ -1508,7 +1504,6 @@ pub struct MemoryReport { // // GPU memory. // - pub gpu_cache_textures: usize, pub vertex_data_textures: usize, pub render_target_textures: usize, pub picture_tile_textures: usize, diff --git a/gfx/wr/webrender/src/render_backend.rs b/gfx/wr/webrender/src/render_backend.rs @@ -30,7 +30,6 @@ use crate::capture::CaptureConfig; use crate::composite::{CompositorKind, CompositeDescriptor}; use crate::frame_builder::{FrameBuilder, FrameBuilderConfig, FrameScratchBuffer}; use glyph_rasterizer::FontInstance; -use crate::gpu_cache::GpuCache; use crate::hit_test::{HitTest, HitTester, SharedHitTester}; use crate::intern::DataStore; #[cfg(any(feature = "capture", feature = "replay"))] @@ -512,7 +511,6 @@ impl Document { fn build_frame( &mut self, resource_cache: &mut ResourceCache, - gpu_cache: &mut GpuCache, debug_flags: DebugFlags, tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>, frame_stats: Option<FullFrameStats>, @@ -533,7 +531,6 @@ impl Document { &mut self.scene, present, resource_cache, - gpu_cache, &mut self.rg_builder, self.stamp, self.view.scene.device_rect.min, @@ -587,7 +584,6 @@ impl Document { &mut self, mut txn: OffscreenBuiltScene, resource_cache: &mut ResourceCache, - gpu_cache: &mut GpuCache, chunk_pool: Arc<ChunkPool>, debug_flags: DebugFlags, ) -> RenderedDocument { @@ -613,7 +609,6 @@ impl Document { &mut txn.scene, present, resource_cache, - gpu_cache, &mut self.rg_builder, self.stamp, // TODO(nical) self.view.scene.device_rect.min, @@ -778,7 +773,6 @@ pub struct RenderBackend { result_tx: Sender<ResultMsg>, scene_tx: Sender<SceneBuilderRequest>, - gpu_cache: GpuCache, resource_cache: ResourceCache, chunk_pool: Arc<ChunkPool>, @@ -830,7 +824,6 @@ impl RenderBackend { result_tx, scene_tx, resource_cache, - gpu_cache: GpuCache::new(), chunk_pool, frame_config, default_compositor_kind : frame_config.compositor_kind, @@ -929,7 +922,6 @@ impl RenderBackend { result_tx: Option<Sender<SceneSwapResult>>, frame_counter: &mut u32, ) -> bool { - self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); @@ -1018,14 +1010,10 @@ impl RenderBackend { let rendered_document = doc.process_offscreen_scene( offscreen_scene, &mut self.resource_cache, - &mut self.gpu_cache, self.chunk_pool.clone(), self.debug_flags, ); - let msg = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates()); - self.result_tx.send(msg).unwrap(); - let pending_update = self.resource_cache.pending_updates(); let msg = ResultMsg::PublishDocument( @@ -1112,8 +1100,6 @@ impl RenderBackend { // recently used resources. self.resource_cache.clear(ClearCache::all()); - self.gpu_cache.clear(); - for (_, doc) in &mut self.documents { doc.scratch.memory_pressure(); for tile_cache in self.tile_caches.values_mut() { @@ -1149,8 +1135,6 @@ impl RenderBackend { return RenderBackendStatus::Continue; } DebugCommand::GenerateFrame => { - self.prepare_for_frames(); - let documents: Vec<DocumentId> = self.documents.keys() .cloned() .collect(); @@ -1182,7 +1166,6 @@ impl RenderBackend { doc.scene.config.force_invalidation = invalidation_config; } } - self.bookkeep_after_frames(); return RenderBackendStatus::Continue; } @@ -1282,7 +1265,6 @@ impl RenderBackend { } DebugCommand::SetFlags(flags) => { self.resource_cache.set_debug_flags(flags); - self.gpu_cache.set_debug_flags(flags); let force_invalidation = flags.contains(DebugFlags::FORCE_PICTURE_INVALIDATION); if self.frame_config.force_invalidation != force_invalidation { @@ -1293,19 +1275,6 @@ impl RenderBackend { self.update_frame_builder_config(); } - // If we're toggling on the GPU cache debug display, we - // need to blow away the cache. This is because we only - // send allocation/free notifications to the renderer - // thread when the debug display is enabled, and thus - // enabling it when the cache is partially populated will - // give the renderer an incomplete view of the world. - // And since we might as well drop all the debugging state - // from the renderer when we disable the debug display, - // we just clear the cache on toggle. - let changed = self.debug_flags ^ flags; - if changed.contains(DebugFlags::GPU_CACHE_DBG) { - self.gpu_cache.clear(); - } self.debug_flags = flags; ResultMsg::DebugCommand(option) @@ -1349,7 +1318,6 @@ impl RenderBackend { result_tx, frame_counter, ); - self.bookkeep_after_frames(); }, #[cfg(feature = "capture")] SceneBuilderResult::CapturedTransactions(txns, capture_config, result_tx) => { @@ -1372,8 +1340,6 @@ impl RenderBackend { if built_frame { self.save_capture_sequence(); } - - self.bookkeep_after_frames(); }, #[cfg(feature = "capture")] SceneBuilderResult::StopCaptureSequence => { @@ -1439,16 +1405,8 @@ impl RenderBackend { ); } - fn prepare_for_frames(&mut self) { - self.gpu_cache.prepare_for_frames(); - } - - fn bookkeep_after_frames(&mut self) { - self.gpu_cache.bookkeep_after_frames(); - } - fn requires_frame_build(&mut self) -> bool { - self.gpu_cache.requires_frame_build() + false // TODO(nical) } fn prepare_transactions( @@ -1456,7 +1414,6 @@ impl RenderBackend { txns: Vec<Box<TransactionMsg>>, frame_counter: &mut u32, ) { - self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); @@ -1489,7 +1446,6 @@ impl RenderBackend { #[cfg(feature = "capture")] self.save_capture_sequence(); } - self.bookkeep_after_frames(); } /// In certain cases, resources shared by multiple documents have to run @@ -1643,7 +1599,6 @@ impl RenderBackend { let rendered_document = doc.build_frame( &mut self.resource_cache, - &mut self.gpu_cache, self.debug_flags, &mut self.tile_caches, frame_stats, @@ -1655,9 +1610,6 @@ impl RenderBackend { debug!("generated frame for document {:?} with {} passes", document_id, rendered_document.frame.passes.len()); - let msg = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates()); - self.result_tx.send(msg).unwrap(); - Telemetry::stop_and_accumulate_framebuild_time(timer_id); let pending_update = self.resource_cache.pending_updates(); @@ -1778,7 +1730,6 @@ impl RenderBackend { let mut report = Box::new(MemoryReport::default()); let ops = self.size_of_ops.as_mut().unwrap(); let op = ops.size_of_op; - report.gpu_cache_metadata = self.gpu_cache.size_of(ops); for doc in self.documents.values() { report.clip_stores += doc.scene.clip_store.size_of(ops); report.hit_testers += match &doc.hit_tester { @@ -1844,10 +1795,6 @@ impl RenderBackend { } let config = CaptureConfig::new(root, bits); - if config.bits.contains(CaptureBits::FRAME) { - self.prepare_for_frames(); - } - for (&id, doc) in &mut self.documents { debug!("\tdocument {:?}", id); if config.bits.contains(CaptureBits::FRAME) { @@ -1855,7 +1802,6 @@ impl RenderBackend { let force_invalidation = std::mem::replace(&mut doc.scene.config.force_invalidation, true); let rendered_document = doc.build_frame( &mut self.resource_cache, - &mut self.gpu_cache, self.debug_flags, &mut self.tile_caches, None, @@ -1866,11 +1812,6 @@ impl RenderBackend { doc.scene.config.force_invalidation = force_invalidation; - // After we rendered the frames, there are pending updates to both - // GPU cache and resources. Instead of serializing them, we are going to make sure - // they are applied on the `Renderer` side. - let msg_update_gpu_cache = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates()); - self.result_tx.send(msg_update_gpu_cache).unwrap(); //TODO: write down doc's pipeline info? // it has `pipeline_epoch_map`, // which may capture necessary details for some cases. @@ -1928,7 +1869,6 @@ impl RenderBackend { // report it here if we do. If we don't, it will simply crash in // Renderer::render_impl and give us less information about the source. assert!(!self.requires_frame_build(), "Caches were cleared during a capture."); - self.bookkeep_after_frames(); } debug!("\tscene builder"); @@ -1962,8 +1902,6 @@ impl RenderBackend { info!("\tresource cache"); let caches = self.resource_cache.save_caches(&config.root); config.serialize_for_resource(&caches, "resource_cache"); - info!("\tgpu cache"); - config.serialize_for_resource(&self.gpu_cache, "gpu_cache"); } DebugOutput::SaveCapture(config, deferred) @@ -2037,11 +1975,6 @@ impl RenderBackend { DebugOutput::LoadCapture(config.clone(), plain_externals) ); self.result_tx.send(msg_load).unwrap(); - - self.gpu_cache = match config.deserialize_for_resource::<GpuCache, _>("gpu_cache") { - Some(gpu_cache) => gpu_cache, - None => GpuCache::new(), - }; } self.frame_config = backend.frame_config; @@ -2126,9 +2059,6 @@ impl RenderBackend { Some(frame) => { info!("\tloaded a built frame with {} passes", frame.passes.len()); - let msg_update = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates()); - self.result_tx.send(msg_update).unwrap(); - self.frame_publish_id.advance(); let msg_publish = ResultMsg::PublishDocument( self.frame_publish_id, diff --git a/gfx/wr/webrender/src/render_task.rs b/gfx/wr/webrender/src/render_task.rs @@ -15,7 +15,6 @@ use crate::profiler::{add_text_marker}; use crate::spatial_tree::SpatialNodeIndex; use crate::filterdata::SFilterData; use crate::frame_builder::FrameBuilderConfig; -use crate::gpu_cache::GpuCache; use crate::gpu_types::{BorderInstance, ImageSource, UvRectKind, TransformPaletteId, BlurEdgeMode}; use crate::internal_types::{CacheTextureId, FastHashMap, FilterGraphNode, FilterGraphOp, FilterGraphPictureReference, SVGFE_CONVOLVE_VALUES_LIMIT, TextureSource, Swizzle}; use crate::picture::{ResolvedSurfaceTexture, MAX_SURFACE_SIZE}; @@ -628,7 +627,6 @@ impl RenderTaskKind { clip_node_range: ClipNodeRange, root_spatial_node_index: SpatialNodeIndex, clip_store: &mut ClipStore, - gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilderF, resource_cache: &mut ResourceCache, rg_builder: &mut RenderTaskGraphBuilder, @@ -686,11 +684,10 @@ impl RenderTaskKind { }), false, RenderTaskParent::RenderTask(clip_task_id), - gpu_cache, gpu_buffer_builder, rg_builder, surface_builder, - &mut |rg_builder, _, _| { + &mut |rg_builder, _| { let clip_data = ClipData::rounded_rect( source.minimal_shadow_rect.size(), &source.shadow_radius, diff --git a/gfx/wr/webrender/src/render_task_cache.rs b/gfx/wr/webrender/src/render_task_cache.rs @@ -9,7 +9,6 @@ use crate::border::BorderSegmentCacheKey; use crate::box_shadow::BoxShadowCacheKey; use crate::device::TextureFilter; use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle}; -use crate::gpu_cache::GpuCache; use crate::internal_types::FastHashMap; use crate::prim_store::image::ImageCacheKey; use crate::prim_store::gradient::{ @@ -230,22 +229,20 @@ impl RenderTaskCache { texture_cache: &mut TextureCache, is_opaque: bool, parent: RenderTaskParent, - gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilderF, rg_builder: &mut RenderTaskGraphBuilder, surface_builder: &mut SurfaceBuilder, - f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF, &mut GpuCache) -> RenderTaskId, + f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF) -> RenderTaskId, ) -> RenderTaskId { // If this render task cache is being drawn this frame, ensure we hook up the // render task for it as a dependency of any render task that uses this as // an input source. let (task_id, rendered_this_frame) = match key { - None => (f(rg_builder, gpu_buffer_builder, gpu_cache), true), + None => (f(rg_builder, gpu_buffer_builder), true), Some(key) => self.request_render_task_impl( key, is_opaque, texture_cache, - gpu_cache, gpu_buffer_builder, rg_builder, f @@ -284,10 +281,9 @@ impl RenderTaskCache { key: RenderTaskCacheKey, is_opaque: bool, texture_cache: &mut TextureCache, - gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilderF, rg_builder: &mut RenderTaskGraphBuilder, - f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF, &mut GpuCache) -> RenderTaskId, + f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF) -> RenderTaskId, ) -> (RenderTaskId, bool) { let frame_id = self.frame_id; let size = key.size; @@ -312,7 +308,7 @@ impl RenderTaskCache { if texture_cache.request(&cache_entry.handle, gpu_buffer_builder) { // Invoke user closure to get render task chain // to draw this into the texture cache. - let render_task_id = f(rg_builder, gpu_buffer_builder, gpu_cache); + let render_task_id = f(rg_builder, gpu_buffer_builder); cache_entry.user_data = None; cache_entry.is_opaque = is_opaque; diff --git a/gfx/wr/webrender/src/render_task_graph.rs b/gfx/wr/webrender/src/render_task_graph.rs @@ -1094,19 +1094,20 @@ impl RenderTaskGraphBuilder { total_surface_count: usize, unique_surfaces: &[(i32, i32, ImageFormat)], ) { - use crate::internal_types::FrameStamp; + use crate::{internal_types::FrameStamp, renderer::{GpuBufferBuilderF, GpuBufferBuilderI}}; use api::{DocumentId, IdNamespace}; let mut rc = ResourceCache::new_for_testing(); - let mut gc = GpuCache::new(); let mut frame_stamp = FrameStamp::first(DocumentId::new(IdNamespace(1), 1)); frame_stamp.advance(); - gc.prepare_for_frames(); - gc.begin_frame(frame_stamp); let frame_memory = FrameMemory::fallback(); - let g = self.end_frame(&mut rc, &mut gc, &mut frame_memory.new_vec(), 2048, &frame_memory); + let mut gpu_buffers = GpuBufferBuilder { + f32: GpuBufferBuilderF::new(&frame_memory), + i32: GpuBufferBuilderI::new(&frame_memory), + }; + let g = self.end_frame(&mut rc, &mut gpu_buffers, &mut frame_memory.new_vec(), 2048, &frame_memory); g.print(); assert_eq!(g.passes.len(), pass_count); diff --git a/gfx/wr/webrender/src/renderer/gpu_cache.rs b/gfx/wr/webrender/src/renderer/gpu_cache.rs @@ -1,534 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -use std::{cmp, mem}; -use api::units::*; -use malloc_size_of::MallocSizeOfOps; -use crate::{ - device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO}, - gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList}, - internal_types::{FrameId, RenderTargetInfo, Swizzle}, - profiler, - render_api::MemoryReport, -}; - -/// Enabling this toggle would force the GPU cache scattered texture to -/// be resized every frame, which enables GPU debuggers to see if this -/// is performed correctly. -const GPU_CACHE_RESIZE_TEST: bool = false; - -/// Tracks the state of each row in the GPU cache texture. -struct CacheRow { - /// Mirrored block data on CPU for this row. We store a copy of - /// the data on the CPU side to improve upload batching. - cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>, - /// The first offset in this row that is dirty. - min_dirty: u16, - /// The last offset in this row that is dirty. - max_dirty: u16, -} - -impl CacheRow { - fn new() -> Self { - CacheRow { - cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]), - min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _, - max_dirty: 0, - } - } - - fn is_dirty(&self) -> bool { - return self.min_dirty < self.max_dirty; - } - - fn clear_dirty(&mut self) { - self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _; - self.max_dirty = 0; - } - - fn add_dirty(&mut self, block_offset: usize, block_count: usize) { - self.min_dirty = self.min_dirty.min(block_offset as _); - self.max_dirty = self.max_dirty.max((block_offset + block_count) as _); - } - - fn dirty_blocks(&self) -> &[GpuBlockData] { - return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize]; - } -} - -/// The bus over which CPU and GPU versions of the GPU cache -/// get synchronized. -enum GpuCacheBus { - /// PBO-based updates, currently operate on a row granularity. - /// Therefore, are subject to fragmentation issues. - PixelBuffer { - /// Per-row data. - rows: Vec<CacheRow>, - }, - /// Shader-based scattering updates. Currently rendered by a set - /// of points into the GPU texture, each carrying a `GpuBlockData`. - Scatter { - /// Special program to run the scattered update. - program: Program, - /// VAO containing the source vertex buffers. - vao: CustomVAO, - /// VBO for positional data, supplied as normalized `u16`. - buf_position: VBO<[u16; 2]>, - /// VBO for gpu block data. - buf_value: VBO<GpuBlockData>, - /// Currently stored block count. - count: usize, - }, -} - -/// The device-specific representation of the cache texture in gpu_cache.rs -pub struct GpuCacheTexture { - texture: Option<Texture>, - bus: GpuCacheBus, -} - -impl GpuCacheTexture { - /// Ensures that we have an appropriately-sized texture. - fn ensure_texture(&mut self, device: &mut Device, height: i32) { - // If we already have a texture that works, we're done. - if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) { - if GPU_CACHE_RESIZE_TEST { - // Special debug mode - resize the texture even though it's fine. - } else { - return; - } - } - - // Take the old texture, if any. - let blit_source = self.texture.take(); - - // Create the new texture. - assert!(height >= 2, "Height is too small for ANGLE"); - let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height); - // GpuCacheBus::Scatter always requires the texture to be a render target. For - // GpuCacheBus::PixelBuffer, we only create the texture with a render target if - // RGBAF32 render targets are actually supported, and only if glCopyImageSubData - // is not. glCopyImageSubData does not require a render target to copy the texture - // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported, - // we simply re-upload the entire contents rather than copying upon resize. - let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data; - let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float; - let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. }) - && (supports_copy_image_sub_data || !supports_color_buffer_float) - { - None - } else { - Some(RenderTargetInfo { has_depth: false }) - }; - let mut texture = device.create_texture( - api::ImageBufferKind::Texture2D, - api::ImageFormat::RGBAF32, - new_size.width, - new_size.height, - TextureFilter::Nearest, - rt_info, - ); - - // Copy the contents of the previous texture, if applicable. - if let Some(blit_source) = blit_source { - if !supports_copy_image_sub_data && !supports_color_buffer_float { - // Cannot copy texture, so must re-upload everything. - match self.bus { - GpuCacheBus::PixelBuffer { ref mut rows } => { - for row in rows { - row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH); - } - } - GpuCacheBus::Scatter { .. } => { - panic!("Texture must be copyable to use scatter GPU cache bus method"); - } - } - } else { - device.copy_entire_texture(&mut texture, &blit_source); - } - device.delete_texture(blit_source); - } - - self.texture = Some(texture); - } - - pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> { - use super::desc::GPU_CACHE_UPDATE; - - let bus = if use_scatter { - assert!( - device.get_capabilities().supports_color_buffer_float, - "GpuCache scatter method requires EXT_color_buffer_float", - ); - let program = device.create_program_linked( - "gpu_cache_update", - &[], - &GPU_CACHE_UPDATE, - )?; - let buf_position = device.create_vbo(); - let buf_value = device.create_vbo(); - //Note: the vertex attributes have to be supplied in the same order - // as for program creation, but each assigned to a different stream. - let vao = device.create_custom_vao(&[ - buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]), - buf_value .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]), - ]); - GpuCacheBus::Scatter { - program, - vao, - buf_position, - buf_value, - count: 0, - } - } else { - GpuCacheBus::PixelBuffer { - rows: Vec::new(), - } - }; - - Ok(GpuCacheTexture { - texture: None, - bus, - }) - } - - pub fn deinit(mut self, device: &mut Device) { - if let Some(t) = self.texture.take() { - device.delete_texture(t); - } - if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus { - device.delete_program(program); - device.delete_custom_vao(vao); - device.delete_vbo(buf_position); - device.delete_vbo(buf_value); - } - } - - pub fn get_height(&self) -> i32 { - self.texture.as_ref().map_or(0, |t| t.get_dimensions().height) - } - - #[cfg(feature = "capture")] - pub fn get_texture(&self) -> &Texture { - self.texture.as_ref().unwrap() - } - - fn prepare_for_updates( - &mut self, - device: &mut Device, - total_block_count: usize, - max_height: i32, - ) { - self.ensure_texture(device, max_height); - match self.bus { - GpuCacheBus::PixelBuffer { .. } => {}, - GpuCacheBus::Scatter { - ref mut buf_position, - ref mut buf_value, - ref mut count, - .. - } => { - *count = 0; - if total_block_count > buf_value.allocated_count() { - device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT); - device.allocate_vbo(buf_value, total_block_count, super::ONE_TIME_USAGE_HINT); - } - } - } - } - - pub fn invalidate(&mut self) { - match self.bus { - GpuCacheBus::PixelBuffer { ref mut rows, .. } => { - info!("Invalidating GPU caches"); - for row in rows { - row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH); - } - } - GpuCacheBus::Scatter { .. } => { - warn!("Unable to invalidate scattered GPU cache"); - } - } - } - - fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) { - match self.bus { - GpuCacheBus::PixelBuffer { ref mut rows, .. } => { - for update in &updates.updates { - match *update { - GpuCacheUpdate::Copy { - block_index, - block_count, - address, - } => { - let row = address.v as usize; - - // Ensure that the CPU-side shadow copy of the GPU cache data has enough - // rows to apply this patch. - while rows.len() <= row { - // Add a new row. - rows.push(CacheRow::new()); - } - - // Copy the blocks from the patch array in the shadow CPU copy. - let block_offset = address.u as usize; - let data = &mut rows[row].cpu_blocks; - for i in 0 .. block_count { - data[block_offset + i] = updates.blocks[block_index + i]; - } - - // This row is dirty (needs to be updated in GPU texture). - rows[row].add_dirty(block_offset, block_count); - } - } - } - } - GpuCacheBus::Scatter { - ref buf_position, - ref buf_value, - ref mut count, - .. - } => { - //TODO: re-use this heap allocation - // Unused positions will be left as 0xFFFF, which translates to - // (1.0, 1.0) in the vertex output position and gets culled out - let mut position_data = vec![[!0u16; 2]; updates.blocks.len()]; - let size = self.texture.as_ref().unwrap().get_dimensions().to_usize(); - - for update in &updates.updates { - match *update { - GpuCacheUpdate::Copy { - block_index, - block_count, - address, - } => { - // Convert the absolute texel position into normalized - let y = ((2*address.v as usize + 1) << 15) / size.height; - for i in 0 .. block_count { - let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width; - position_data[block_index + i] = [x as _, y as _]; - } - } - } - } - - device.fill_vbo(buf_value, &updates.blocks, *count); - device.fill_vbo(buf_position, &position_data, *count); - *count += position_data.len(); - } - } - } - - fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize { - let texture = self.texture.as_ref().unwrap(); - match self.bus { - GpuCacheBus::PixelBuffer { ref mut rows } => { - let rows_dirty = rows - .iter() - .filter(|row| row.is_dirty()) - .count(); - if rows_dirty == 0 { - return 0 - } - - let mut uploader = device.upload_texture(pbo_pool); - - for (row_index, row) in rows.iter_mut().enumerate() { - if !row.is_dirty() { - continue; - } - - let blocks = row.dirty_blocks(); - let rect = DeviceIntRect::from_origin_and_size( - DeviceIntPoint::new(row.min_dirty as i32, row_index as i32), - DeviceIntSize::new(blocks.len() as i32, 1), - ); - - uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len()); - - row.clear_dirty(); - } - - uploader.flush(device); - - rows_dirty - } - GpuCacheBus::Scatter { ref program, ref vao, count, .. } => { - device.disable_depth(); - device.set_blend(false); - device.bind_program(program); - device.bind_custom_vao(vao); - device.bind_draw_target( - DrawTarget::from_texture( - texture, - false, - ), - ); - device.draw_nonindexed_points(0, count as _); - 0 - } - } - } - - #[cfg(feature = "replay")] - pub fn remove_texture(&mut self, device: &mut Device) { - if let Some(t) = self.texture.take() { - device.delete_texture(t); - } - } - - #[cfg(feature = "replay")] - pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) { - assert!(self.texture.is_none()); - match self.bus { - GpuCacheBus::PixelBuffer { ref mut rows, .. } => { - let dim = texture.get_dimensions(); - let blocks = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const GpuBlockData, - data.len() / mem::size_of::<GpuBlockData>(), - ) - }; - // fill up the CPU cache from the contents we just loaded - rows.clear(); - rows.extend((0 .. dim.height).map(|_| CacheRow::new())); - let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH); - debug_assert_eq!(chunks.len(), rows.len()); - for (row, chunk) in rows.iter_mut().zip(chunks) { - row.cpu_blocks.copy_from_slice(chunk); - } - } - GpuCacheBus::Scatter { .. } => {} - } - self.texture = Some(texture); - } - - pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) { - if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus { - for row in rows.iter() { - report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) }; - } - } - - // GPU cache GPU memory. - report.gpu_cache_textures += - self.texture.as_ref().map_or(0, |t| t.size_in_bytes()); - } - - pub fn gpu_size_in_bytes(&self) -> usize { - match &self.texture { - Some(tex) => tex.size_in_bytes(), - None => 0, - } - } -} - -impl super::Renderer { - pub fn update_gpu_cache(&mut self) { - let _gm = self.gpu_profiler.start_marker("gpu cache update"); - - // For an artificial stress test of GPU cache resizing, - // always pass an extra update list with at least one block in it. - let gpu_cache_height = self.gpu_cache_texture.get_height(); - if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST { - self.pending_gpu_cache_updates.push(GpuCacheUpdateList { - frame_id: FrameId::INVALID, - clear: false, - height: gpu_cache_height, - blocks: vec![[1f32; 4].into()], - updates: Vec::new(), - debug_commands: Vec::new(), - }); - } - - let (updated_blocks, max_requested_height) = self - .pending_gpu_cache_updates - .iter() - .fold((0, gpu_cache_height), |(count, height), list| { - (count + list.blocks.len(), cmp::max(height, list.height)) - }); - - if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow { - self.gpu_cache_overflow = true; - self.renderer_errors.push(super::RendererError::MaxTextureSize); - } - - // Note: if we decide to switch to scatter-style GPU cache update - // permanently, we can have this code nicer with `BufferUploader` kind - // of helper, similarly to how `TextureUploader` API is used. - self.gpu_cache_texture.prepare_for_updates( - &mut self.device, - updated_blocks, - max_requested_height, - ); - - for update_list in self.pending_gpu_cache_updates.drain(..) { - assert!(update_list.height <= max_requested_height); - if update_list.frame_id > self.gpu_cache_frame_id { - self.gpu_cache_frame_id = update_list.frame_id - } - self.gpu_cache_texture - .update(&mut self.device, &update_list); - } - - self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME); - let updated_rows = self.gpu_cache_texture.flush( - &mut self.device, - &mut self.texture_upload_pbo_pool - ); - self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME); - - self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows); - self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks); - } - - pub fn prepare_gpu_cache(&mut self) -> Result<(), super::RendererError> { - self.profile.start_time(profiler::GPU_CACHE_PREPARE_TIME); - - if self.pending_gpu_cache_clear { - let use_scatter = - matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. }); - let new_cache = match GpuCacheTexture::new(&mut self.device, use_scatter) { - Ok(cache) => cache, - Err(err) => { - self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME); - return Err(err); - } - }; - let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache); - old_cache.deinit(&mut self.device); - self.pending_gpu_cache_clear = false; - } - - self.update_gpu_cache(); - - // Note: the texture might have changed during the `update`, - // so we need to bind it here. - self.device.bind_texture( - super::TextureSampler::GpuCache, - self.gpu_cache_texture.texture.as_ref().unwrap(), - Swizzle::default(), - ); - - self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME); - - Ok(()) - } - - pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) { - let texture = self.gpu_cache_texture.texture.as_ref().unwrap(); - let size = device_size_as_framebuffer_size(texture.get_dimensions()); - let mut texels = vec![0; (size.width * size.height * 16) as usize]; - self.device.begin_frame(); - self.device.bind_read_target(ReadTarget::from_texture(texture)); - self.device.read_pixels_into( - size.into(), - api::ImageFormat::RGBAF32, - &mut texels, - ); - self.device.reset_read_target(); - self.device.end_frame(); - (texture.get_dimensions(), texels) - } -} diff --git a/gfx/wr/webrender/src/renderer/init.rs b/gfx/wr/webrender/src/renderer/init.rs @@ -19,7 +19,7 @@ use crate::frame_builder::FrameBuilderConfig; use crate::glyph_cache::GlyphCache; use glyph_rasterizer::{GlyphRasterThread, GlyphRasterizer, SharedFontResources}; use crate::gpu_types::PrimitiveInstanceData; -use crate::internal_types::{FastHashMap, FastHashSet, FrameId}; +use crate::internal_types::{FastHashMap, FastHashSet}; use crate::picture; use crate::profiler::{self, Profiler, TransactionProfile}; use crate::device::query::{GpuProfiler, GpuDebugMethod}; @@ -29,7 +29,7 @@ use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels use crate::texture_cache::{TextureCache, TextureCacheConfig}; use crate::picture_textures::PictureTextures; use crate::renderer::{ - debug, gpu_cache, vertex, gl, + debug, vertex, gl, Renderer, DebugOverlayState, BufferDamageTracker, PipelineInfo, TextureResolver, RendererError, ShaderPrecacheFlags, VERTEX_DATA_TEXTURE_COUNT, upload::UploadTexturePool, @@ -514,25 +514,8 @@ pub fn create_webrender_instance( vertex_data_textures.push(vertex::VertexDataTextures::new()); } - // On some (mostly older, integrated) GPUs, the normal GPU texture cache update path - // doesn't work well when running on ANGLE, causing CPU stalls inside D3D and/or the - // GPU driver. See https://bugzilla.mozilla.org/show_bug.cgi?id=1576637 for much - // more detail. To reduce the number of code paths we have active that require testing, - // we will enable the GPU cache scatter update path on all devices running with ANGLE. - // We want a better solution long-term, but for now this is a significant performance - // improvement on HD4600 era GPUs, and shouldn't hurt performance in a noticeable - // way on other systems running under ANGLE. let is_software = device.get_capabilities().renderer_name.starts_with("Software"); - // On other GL platforms, like macOS or Android, creating many PBOs is very inefficient. - // This is what happens in GPU cache updates in PBO path. Instead, we switch everything - // except software GL to use the GPU scattered updates. - let supports_scatter = device.get_capabilities().supports_color_buffer_float; - let gpu_cache_texture = gpu_cache::GpuCacheTexture::new( - &mut device, - supports_scatter && !is_software, - )?; - device.end_frame(); let backend_notifier = notifier.clone(); @@ -780,8 +763,6 @@ pub fn create_webrender_instance( pending_texture_updates: Vec::new(), pending_texture_cache_updates: false, pending_native_surface_updates: Vec::new(), - pending_gpu_cache_updates: Vec::new(), - pending_gpu_cache_clear: false, pending_shader_updates: Vec::new(), shaders, debug: debug::LazyInitializedDebugRenderer::new(), @@ -789,7 +770,6 @@ pub fn create_webrender_instance( profile: TransactionProfile::new(), frame_counter: 0, resource_upload_time: 0.0, - gpu_cache_upload_time: 0.0, profiler: Profiler::new(), max_recorded_profiles: options.max_recorded_profiles, clear_color: options.clear_color, @@ -808,10 +788,6 @@ pub fn create_webrender_instance( size_of_ops: make_size_of_ops(), cpu_profiles: VecDeque::new(), gpu_profiles: VecDeque::new(), - gpu_cache_texture, - gpu_cache_debug_chunks: Vec::new(), - gpu_cache_frame_id: FrameId::INVALID, - gpu_cache_overflow: false, texture_upload_pbo_pool, staging_texture_pool, texture_resolver, diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs @@ -70,11 +70,9 @@ use crate::device::FBOId; use crate::debug_item::DebugItem; use crate::frame_builder::Frame; use glyph_rasterizer::GlyphFormat; -use crate::gpu_cache::GpuCacheUpdateList; -use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd}; use crate::gpu_types::{ScalingInstance, SvgFilterInstance, SVGFEFilterInstance, CopyInstance, PrimitiveInstanceData}; use crate::gpu_types::{BlurInstance, ClearInstance, CompositeInstance, ZBufferId}; -use crate::internal_types::{TextureSource, TextureSourceExternal, FrameId, FrameVec}; +use crate::internal_types::{TextureSource, TextureSourceExternal, FrameVec}; #[cfg(any(feature = "capture", feature = "replay"))] use crate::internal_types::DebugOutput; use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, RenderedDocument, ResultMsg}; @@ -122,7 +120,6 @@ use std::collections::hash_map::Entry; mod debug; mod gpu_buffer; -mod gpu_cache; mod shade; mod vertex; mod upload; @@ -390,7 +387,6 @@ pub(crate) enum TextureSampler { Color0, Color1, Color2, - GpuCache, TransformPalette, RenderTasks, Dither, @@ -420,15 +416,14 @@ impl Into<TextureSlot> for TextureSampler { TextureSampler::Color0 => TextureSlot(0), TextureSampler::Color1 => TextureSlot(1), TextureSampler::Color2 => TextureSlot(2), - TextureSampler::GpuCache => TextureSlot(3), - TextureSampler::TransformPalette => TextureSlot(4), - TextureSampler::RenderTasks => TextureSlot(5), - TextureSampler::Dither => TextureSlot(6), - TextureSampler::PrimitiveHeadersF => TextureSlot(7), - TextureSampler::PrimitiveHeadersI => TextureSlot(8), - TextureSampler::ClipMask => TextureSlot(9), - TextureSampler::GpuBufferF => TextureSlot(10), - TextureSampler::GpuBufferI => TextureSlot(11), + TextureSampler::TransformPalette => TextureSlot(3), + TextureSampler::RenderTasks => TextureSlot(4), + TextureSampler::Dither => TextureSlot(5), + TextureSampler::PrimitiveHeadersF => TextureSlot(6), + TextureSampler::PrimitiveHeadersI => TextureSlot(7), + TextureSampler::ClipMask => TextureSlot(8), + TextureSampler::GpuBufferF => TextureSlot(9), + TextureSampler::GpuBufferI => TextureSlot(10), } } } @@ -825,8 +820,6 @@ pub struct Renderer { /// True if there are any TextureCacheUpdate pending. pending_texture_cache_updates: bool, pending_native_surface_updates: Vec<NativeSurfaceOperation>, - pending_gpu_cache_updates: Vec<GpuCacheUpdateList>, - pending_gpu_cache_clear: bool, pending_shader_updates: Vec<PathBuf>, active_documents: FastHashMap<DocumentId, RenderedDocument>, @@ -845,7 +838,6 @@ pub struct Renderer { profile: TransactionProfile, frame_counter: u64, resource_upload_time: f64, - gpu_cache_upload_time: f64, profiler: Profiler, #[cfg(feature = "debugger")] debugger: Debugger, @@ -855,18 +847,9 @@ pub struct Renderer { pub gpu_profiler: GpuProfiler, vaos: vertex::RendererVAOs, - gpu_cache_texture: gpu_cache::GpuCacheTexture, vertex_data_textures: Vec<vertex::VertexDataTextures>, current_vertex_data_textures: usize, - /// When the GPU cache debugger is enabled, we keep track of the live blocks - /// in the GPU cache so that we can use them for the debug display. This - /// member stores those live blocks, indexed by row. - gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>, - - gpu_cache_frame_id: FrameId, - gpu_cache_overflow: bool, - pipeline_info: PipelineInfo, // Manages and resolves source textures IDs to real texture IDs. @@ -1119,32 +1102,6 @@ impl Renderer { self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates); self.documents_seen.insert(document_id); } - ResultMsg::UpdateGpuCache(mut list) => { - if list.clear { - self.pending_gpu_cache_clear = true; - } - if list.clear { - self.gpu_cache_debug_chunks = Vec::new(); - } - for cmd in mem::replace(&mut list.debug_commands, Vec::new()) { - match cmd { - GpuCacheDebugCmd::Alloc(chunk) => { - let row = chunk.address.v as usize; - if row >= self.gpu_cache_debug_chunks.len() { - self.gpu_cache_debug_chunks.resize(row + 1, Vec::new()); - } - self.gpu_cache_debug_chunks[row].push(chunk); - }, - GpuCacheDebugCmd::Free(address) => { - let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize]; - let pos = chunks.iter() - .position(|x| x.address == address).unwrap(); - chunks.remove(pos); - }, - } - } - self.pending_gpu_cache_updates.push(list); - } ResultMsg::UpdateResources { resource_updates, memory_pressure, @@ -1369,9 +1326,6 @@ impl Renderer { | DebugCommand::SimulateLongSceneBuild(_) | DebugCommand::EnableNativeCompositor(_) | DebugCommand::SetBatchingLookback(_) => {} - DebugCommand::InvalidateGpuCache => { - self.gpu_cache_texture.invalidate(); - } DebugCommand::SetFlags(flags) => { self.set_debug_flags(flags); } @@ -1507,7 +1461,6 @@ impl Renderer { DebugFlags::RENDER_TARGET_DBG | DebugFlags::TEXTURE_CACHE_DBG | DebugFlags::EPOCHS | - DebugFlags::GPU_CACHE_DBG | DebugFlags::PICTURE_CACHING_DBG | DebugFlags::PICTURE_BORDERS | DebugFlags::ZOOM_DBG | @@ -1750,40 +1703,29 @@ impl Renderer { self.update_deferred_resolves(&frame.deferred_resolves, &mut frame.gpu_buffer_f); - match self.prepare_gpu_cache() { - Ok(..) => { - assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, - "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", - frame.gpu_cache_frame_id, self.gpu_cache_frame_id); - - self.draw_frame( - frame, - device_size, - buffer_age, - &mut results, - ); - - // TODO(nical): do this automatically by selecting counters in the wr profiler - // Profile marker for the number of invalidated picture cache - if thread_is_being_profiled() { - let duration = Duration::new(0,0); - if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) { - let message = (n as usize).to_string(); - add_text_marker("NumPictureCacheInvalidated", &message, duration); - } - } - - if device_size.is_some() { - self.draw_frame_debug_items(&frame.debug_items); - } + self.draw_frame( + frame, + device_size, + buffer_age, + &mut results, + ); - self.profile.merge(profile); - } - Err(e) => { - self.renderer_errors.push(e); + // TODO(nical): do this automatically by selecting counters in the wr profiler + // Profile marker for the number of invalidated picture cache + if thread_is_being_profiled() { + let duration = Duration::new(0,0); + if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) { + let message = (n as usize).to_string(); + add_text_marker("NumPictureCacheInvalidated", &message, duration); } } + if device_size.is_some() { + self.draw_frame_debug_items(&frame.debug_items); + } + + self.profile.merge(profile); + self.unlock_external_images(&frame.deferred_resolves); let _gm = self.gpu_profiler.start_marker("end frame"); @@ -1803,7 +1745,6 @@ impl Renderer { self.bind_debug_overlay(device_size).map(|draw_target| { self.draw_render_target_debug(&draw_target); self.draw_texture_cache_debug(&draw_target); - self.draw_gpu_cache_debug(device_size); self.draw_zoom_debug(device_size); self.draw_epoch_debug(); self.draw_window_visibility_debug(); @@ -1851,8 +1792,6 @@ impl Renderer { self.frame_counter += 1; results.stats.resource_upload_time = self.resource_upload_time; self.resource_upload_time = 0.0; - results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time; - self.gpu_cache_upload_time = 0.0; if let Some(stats) = active_doc.frame_stats.take() { // Copy the full frame stats to RendererStats @@ -5313,10 +5252,6 @@ impl Renderer { let gpu_buffer_mb = (gpu_buffer_bytes_f + gpu_buffer_bytes_i) as f32 * bytes_to_mb; self.profile.set(profiler::GPU_BUFFER_MEM, gpu_buffer_mb); - let gpu_cache_bytes = self.gpu_cache_texture.gpu_size_in_bytes(); - let gpu_cache_mb = gpu_cache_bytes as f32 * bytes_to_mb; - self.profile.set(profiler::GPU_CACHE_MEM, gpu_cache_mb); - // Determine the present mode and dirty rects, if device_size // is Some(..). If it's None, no composite will occur and only // picture cache and texture cache targets will be updated. @@ -6024,42 +5959,6 @@ impl Renderer { } } - fn draw_gpu_cache_debug(&mut self, device_size: DeviceIntSize) { - if !self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) { - return; - } - - let debug_renderer = match self.debug.get_mut(&mut self.device) { - Some(render) => render, - None => return, - }; - - let (x_off, y_off) = (30f32, 30f32); - let height = self.gpu_cache_texture.get_height() - .min(device_size.height - (y_off as i32) * 2) as usize; - debug_renderer.add_quad( - x_off, - y_off, - x_off + MAX_VERTEX_TEXTURE_WIDTH as f32, - y_off + height as f32, - ColorU::new(80, 80, 80, 80), - ColorU::new(80, 80, 80, 80), - ); - - let upper = self.gpu_cache_debug_chunks.len().min(height); - for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() { - let color = ColorU::new(250, 0, 0, 200); - debug_renderer.add_quad( - x_off + chunk.address.u as f32, - y_off + chunk.address.v as f32, - x_off + chunk.address.u as f32 + chunk.size as f32, - y_off + chunk.address.v as f32 + 1.0, - color, - color, - ); - } - } - /// Pass-through to `Device::read_pixels_into`, used by Gecko's WR bindings. pub fn read_pixels_into(&mut self, rect: FramebufferIntRect, format: ImageFormat, output: &mut [u8]) { self.device.read_pixels_into(rect, format, output); @@ -6087,7 +5986,6 @@ impl Renderer { } compositor.deinit(&mut self.device); } - self.gpu_cache_texture.deinit(&mut self.device); if let Some(dither_matrix_texture) = self.dither_matrix_texture { self.device.delete_texture(dither_matrix_texture); } @@ -6128,9 +6026,6 @@ impl Renderer { pub fn report_memory(&self, swgl: *mut c_void) -> MemoryReport { let mut report = MemoryReport::default(); - // GPU cache CPU memory. - self.gpu_cache_texture.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap()); - self.staging_texture_pool.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap()); // Render task CPU memory. @@ -6247,7 +6142,6 @@ pub struct RendererStats { pub color_target_count: usize, pub texture_upload_mb: f64, pub resource_upload_time: f64, - pub gpu_cache_upload_time: f64, pub gecko_display_list_time: f64, pub wr_display_list_time: f64, pub scene_build_time: f64, @@ -6307,8 +6201,6 @@ struct PlainTexture { #[cfg_attr(feature = "replay", derive(Deserialize))] struct PlainRenderer { device_size: Option<DeviceIntSize>, - gpu_cache: PlainTexture, - gpu_cache_frame_id: FrameId, textures: FastHashMap<CacheTextureId, PlainTexture>, } @@ -6541,15 +6433,8 @@ impl Renderer { fs::create_dir(&path_textures).unwrap(); } - info!("saving GPU cache"); - self.update_gpu_cache(); // flush pending updates let mut plain_self = PlainRenderer { device_size: self.device_size, - gpu_cache: Self::save_texture( - self.gpu_cache_texture.get_texture(), - None, "gpu", &root, &mut self.device, - ), - gpu_cache_frame_id: self.gpu_cache_frame_id, textures: FastHashMap::default(), }; @@ -6658,7 +6543,6 @@ impl Renderer { } self.device.begin_frame(); - self.gpu_cache_texture.remove_texture(&mut self.device); if let Some(renderer) = config.deserialize_for_resource::<PlainRenderer, _>("renderer") { info!("loading cached textures"); @@ -6682,17 +6566,6 @@ impl Renderer { category: texture.category.unwrap_or(TextureCacheCategory::Standalone), }); } - - info!("loading gpu cache"); - let (t, gpu_cache_data) = Self::load_texture( - ImageBufferKind::Texture2D, - &renderer.gpu_cache, - Some(RenderTargetInfo { has_depth: false }), - &root, - &mut self.device, - ); - self.gpu_cache_texture.load_from_data(t, gpu_cache_data); - self.gpu_cache_frame_id = renderer.gpu_cache_frame_id; } else { info!("loading cached textures"); self.device.begin_frame(); diff --git a/gfx/wr/webrender/src/renderer/shade.rs b/gfx/wr/webrender/src/renderer/shade.rs @@ -287,7 +287,6 @@ impl LazilyCompiledShader { ("sColor0", TextureSampler::Color0), ("sTransformPalette", TextureSampler::TransformPalette), ("sRenderTasks", TextureSampler::RenderTasks), - ("sGpuCache", TextureSampler::GpuCache), ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF), ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI), ("sGpuBufferF", TextureSampler::GpuBufferF), @@ -305,7 +304,6 @@ impl LazilyCompiledShader { ("sDither", TextureSampler::Dither), ("sTransformPalette", TextureSampler::TransformPalette), ("sRenderTasks", TextureSampler::RenderTasks), - ("sGpuCache", TextureSampler::GpuCache), ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF), ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI), ("sClipMask", TextureSampler::ClipMask), diff --git a/gfx/wr/webrender/src/renderer/vertex.rs b/gfx/wr/webrender/src/renderer/vertex.rs @@ -505,22 +505,6 @@ pub mod desc { ], }; - pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor { - vertex_attributes: &[ - VertexAttribute { - name: "aPosition", - count: 2, - kind: VertexAttributeKind::U16Norm, - }, - VertexAttribute { - name: "aValue", - count: 4, - kind: VertexAttributeKind::F32, - }, - ], - instance_attributes: &[], - }; - pub const RESOLVE: VertexDescriptor = VertexDescriptor { vertex_attributes: &[VertexAttribute { name: "aPosition", diff --git a/gfx/wr/webrender/src/resource_cache.rs b/gfx/wr/webrender/src/resource_cache.rs @@ -27,7 +27,6 @@ use crate::glyph_cache::{GlyphCache, CachedGlyphInfo}; use crate::glyph_cache::GlyphCacheEntry; use glyph_rasterizer::{GLYPH_FLASHING, FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer, GlyphRasterJob}; use glyph_rasterizer::{SharedFontResources, BaseFontInstance}; -use crate::gpu_cache::GpuCache; use crate::gpu_types::UvRectKind; use crate::internal_types::{ CacheTextureId, FastHashMap, FastHashSet, TextureSource, ResourceUpdateList, @@ -632,18 +631,16 @@ impl ResourceCache { key: Option<RenderTaskCacheKey>, is_opaque: bool, parent: RenderTaskParent, - gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilderF, rg_builder: &mut RenderTaskGraphBuilder, surface_builder: &mut SurfaceBuilder, - f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF, &mut GpuCache) -> RenderTaskId, + f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF) -> RenderTaskId, ) -> RenderTaskId { self.cached_render_tasks.request_render_task( key.clone(), &mut self.texture_cache, is_opaque, parent, - gpu_cache, gpu_buffer_builder, rg_builder, surface_builder, @@ -657,13 +654,12 @@ impl ResourceCache { size: DeviceIntSize, rg_builder: &mut RenderTaskGraphBuilder, gpu_buffer_builder: &mut GpuBufferBuilderF, - gpu_cache: &mut GpuCache, is_opaque: bool, adjustment: &AdjustedImageSource, - f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF, &mut GpuCache) -> RenderTaskId, + f: &mut dyn FnMut(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilderF) -> RenderTaskId, ) -> RenderTaskId { - let task_id = f(rg_builder, gpu_buffer_builder, gpu_cache); + let task_id = f(rg_builder, gpu_buffer_builder); let render_task = rg_builder.get_task_mut(task_id); diff --git a/gfx/wr/webrender/src/texture_cache.rs b/gfx/wr/webrender/src/texture_cache.rs @@ -550,11 +550,9 @@ impl TextureCacheConfig { /// frame in which they are requested, and may be evicted. The API supports /// querying whether an entry is still available. /// -/// The TextureCache is different from the GpuCache in that the former stores -/// images, whereas the latter stores data and parameters for use in the shaders. -/// This means that the texture cache can be visualized, which is a good way to -/// understand how it works. Enabling gfx.webrender.debug.texture-cache shows a -/// live view of its contents in Firefox. +/// The texture cache can be visualized, which is a good way to understand how +/// it works. Enabling gfx.webrender.debug.texture-cache shows a live view of +/// its contents in Firefox. #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct TextureCache { @@ -1649,6 +1647,8 @@ impl TextureCacheUpdate { #[cfg(test)] mod test_texture_cache { + use crate::renderer::GpuBufferBuilderF; + #[test] fn check_allocation_size_balance() { // Allocate some glyphs, observe the total allocation size, and free @@ -1658,11 +1658,13 @@ mod test_texture_cache { use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader}; use crate::device::TextureFilter; use crate::gpu_types::UvRectKind; + use crate::frame_allocator::FrameMemory; use api::{ImageDescriptor, ImageDescriptorFlags, ImageFormat, DirtyRect}; use api::units::*; use euclid::size2; let mut texture_cache = TextureCache::new_for_testing(2048, ImageFormat::BGRA8); - let mut gpu_cache = GpuCache::new_for_testing(); + let memory = FrameMemory::fallback(); + let mut gpu_buffer = GpuBufferBuilderF::new(&memory); let sizes: &[DeviceIntSize] = &[ size2(23, 27), @@ -1683,7 +1685,7 @@ mod test_texture_cache { let handles: Vec<TextureCacheHandle> = sizes.iter().map(|size| { let mut texture_cache_handle = TextureCacheHandle::invalid(); - texture_cache.request(&texture_cache_handle, &mut gpu_cache); + texture_cache.request(&texture_cache_handle, &mut gpu_buffer); texture_cache.update( &mut texture_cache_handle, ImageDescriptor { @@ -1697,7 +1699,7 @@ mod test_texture_cache { None, [0.0; 4], DirtyRect::All, - &mut gpu_cache, + &mut gpu_buffer, None, UvRectKind::Rect, Eviction::Manual, diff --git a/gfx/wr/webrender/src/visibility.rs b/gfx/wr/webrender/src/visibility.rs @@ -17,7 +17,6 @@ use crate::renderer::GpuBufferBuilder; use crate::spatial_tree::{SpatialTree, SpatialNodeIndex}; use crate::clip::{ClipChainInstance, ClipTree}; use crate::frame_builder::FrameBuilderConfig; -use crate::gpu_cache::GpuCache; use crate::picture::{PictureCompositeMode, ClusterFlags, SurfaceInfo, TileCacheInstance}; use crate::picture::{SurfaceIndex, RasterConfig, SubSliceIndex}; use crate::prim_store::{ClipTaskIndex, PictureIndex, PrimitiveInstanceKind}; @@ -42,7 +41,6 @@ pub struct FrameVisibilityContext<'a> { pub struct FrameVisibilityState<'a> { pub clip_store: &'a mut ClipStore, pub resource_cache: &'a mut ResourceCache, - pub gpu_cache: &'a mut GpuCache, pub frame_gpu_data: &'a mut GpuBufferBuilder, pub data_stores: &'a mut DataStores, pub clip_tree: &'a mut ClipTree, diff --git a/gfx/wr/webrender_build/src/shader.rs b/gfx/wr/webrender_build/src/shader.rs @@ -195,8 +195,8 @@ pub fn build_shader_prefix_string<F: FnMut(&str)>( // detect which platform we're targeting let is_macos = match std::env::var("CARGO_CFG_TARGET_OS") { Ok(os) => os == "macos", - // if this is not called from build.rs (e.g. the gpu_cache_update shader or - // if the optimized shader pref is disabled) we want to use the runtime value + // if this is not called from build.rs (e.g. if the optimized shader + // pref is disabled) we want to use the runtime value Err(_) => cfg!(target_os = "macos"), }; let is_android = match std::env::var("CARGO_CFG_TARGET_OS") { diff --git a/gfx/wr/wrench/src/main.rs b/gfx/wr/wrench/src/main.rs @@ -787,7 +787,6 @@ pub fn main() { } else if let Some(subargs) = args.subcommand_matches("png") { let surface = match subargs.value_of("surface") { Some("screen") | None => png::ReadSurface::Screen, - Some("gpu-cache") => png::ReadSurface::GpuCache, _ => panic!("Unknown surface argument value") }; let output_path = subargs.value_of("OUTPUT").map(PathBuf::from); diff --git a/gfx/wr/wrench/src/png.rs b/gfx/wr/wrench/src/png.rs @@ -14,7 +14,6 @@ use crate::yaml_frame_reader::YamlFrameReader; pub enum ReadSurface { Screen, - GpuCache, } pub struct SaveSettings { @@ -98,14 +97,6 @@ pub fn png( try_crop: true, }) } - ReadSurface::GpuCache => { - let (size, data) = wrench.renderer - .read_gpu_cache(); - (size, data, SaveSettings { - flip_vertical: false, - try_crop: false, - }) - } }; let out_path = out_path.unwrap_or_else(|| {