tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 5202e780e1906ff18568c035bd76a5552bc751a6
parent f236619b2c964a9650bb6dc9a0f4f4e28be23bd2
Author: Nicolas Silva <nical@fastmail.com>
Date:   Tue,  9 Dec 2025 08:19:02 +0000

Bug 1996818 - Convert remaining uses of the gpu cache to gpu buffer. r=gw

Differential Revision: https://phabricator.services.mozilla.com/D270683

Diffstat:
Mgfx/wr/webrender/res/blend.glsl | 6+++---
Mgfx/wr/webrender/res/brush.glsl | 10+++++-----
Mgfx/wr/webrender/res/brush_image.glsl | 2+-
Mgfx/wr/webrender/res/brush_linear_gradient.glsl | 2+-
Mgfx/wr/webrender/res/brush_solid.glsl | 2+-
Mgfx/wr/webrender/res/brush_yuv_image.glsl | 2+-
Mgfx/wr/webrender/res/clip_shared.glsl | 2+-
Mgfx/wr/webrender/res/cs_clip_box_shadow.glsl | 6+++---
Mgfx/wr/webrender/res/cs_conic_gradient.glsl | 2+-
Mgfx/wr/webrender/res/cs_linear_gradient.glsl | 2+-
Mgfx/wr/webrender/res/cs_radial_gradient.glsl | 2+-
Mgfx/wr/webrender/res/cs_svg_filter.glsl | 22+++++++++++++---------
Mgfx/wr/webrender/res/cs_svg_filter_node.glsl | 30+++++++++++++++++-------------
Mgfx/wr/webrender/res/gpu_cache.glsl | 2--
Mgfx/wr/webrender/res/image_source.glsl | 2++
Mgfx/wr/webrender/res/prim_shared.glsl | 2+-
Mgfx/wr/webrender/res/ps_quad.glsl | 4++--
Mgfx/wr/webrender/res/ps_split_composite.glsl | 13+++++--------
Mgfx/wr/webrender/res/ps_text_run.glsl | 4++--
Mgfx/wr/webrender/src/batch.rs | 76++++++++++++++++++++++++++++++++++++----------------------------------------
Mgfx/wr/webrender/src/command_buffer.rs | 8++++----
Mgfx/wr/webrender/src/frame_builder.rs | 61++++++++++++++++++++++++++++---------------------------------
Mgfx/wr/webrender/src/gpu_types.rs | 19+++++++++----------
Mgfx/wr/webrender/src/picture.rs | 118+++++++++++++++++++++++++++++++++++++------------------------------------------
Mgfx/wr/webrender/src/prepare.rs | 75++++++++++++++++++++++++++-------------------------------------------------
Mgfx/wr/webrender/src/prim_store/borders.rs | 60++++++++++++++++++++++++++----------------------------------
Mgfx/wr/webrender/src/prim_store/gradient/conic.rs | 38+++++++++++++++++---------------------
Mgfx/wr/webrender/src/prim_store/gradient/linear.rs | 75++++++++++++++++++++++++++++++++++++---------------------------------------
Mgfx/wr/webrender/src/prim_store/gradient/mod.rs | 6+++---
Mgfx/wr/webrender/src/prim_store/gradient/radial.rs | 39++++++++++++++++++---------------------
Mgfx/wr/webrender/src/prim_store/image.rs | 33++++++++++++++++-----------------
Mgfx/wr/webrender/src/prim_store/line_dec.rs | 20++++++++++----------
Mgfx/wr/webrender/src/prim_store/mod.rs | 34+++++++++++++++++-----------------
Mgfx/wr/webrender/src/prim_store/text_run.rs | 48++++++++++++++++++++++++------------------------
Mgfx/wr/webrender/src/quad.rs | 4++--
Mgfx/wr/webrender/src/render_target.rs | 9++-------
Mgfx/wr/webrender/src/renderer/mod.rs | 2+-
Mgfx/wr/webrender/src/renderer/vertex.rs | 12++++++------
Mgfx/wr/webrender/src/resource_cache.rs | 1-
Mgfx/wr/webrender/src/texture_cache.rs | 2+-
40 files changed, 398 insertions(+), 459 deletions(-)

diff --git a/gfx/wr/webrender/res/blend.glsl b/gfx/wr/webrender/res/blend.glsl @@ -77,14 +77,14 @@ void SetupFilterParams( ); color_offset = vec4(0.0); } else if (op == FILTER_COLOR_MATRIX) { - vec4 mat_data[4] = fetch_from_gpu_cache_4(gpu_data_address); - vec4 offset_data = fetch_from_gpu_cache_1(gpu_data_address + 4); + vec4 mat_data[4] = fetch_from_gpu_buffer_4f(gpu_data_address); + vec4 offset_data = fetch_from_gpu_buffer_1f(gpu_data_address + 4); color_mat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]); color_offset = offset_data; } else if (op == FILTER_COMPONENT_TRANSFER) { table_address = gpu_data_address; } else if (op == FILTER_FLOOD) { - color_offset = fetch_from_gpu_cache_1(gpu_data_address); + color_offset = fetch_from_gpu_buffer_1f(gpu_data_address); } } #endif diff --git a/gfx/wr/webrender/res/brush.glsl b/gfx/wr/webrender/res/brush.glsl @@ -24,15 +24,15 @@ /// | z: flags | | | | local_clip_rect | +-----------------------+ | | /// | segment_index | | | +---------------------+ | | /// | w: resource_address +--+ | | | | -/// +----------------------------+ | | | (sGpuCache) | | -/// | | | (sGpuCache) +------------+ | | +/// +----------------------------+ | | | (float gpu buffer) | | +/// | | | (float gpu buffer) +------------+ | | /// | | | +---------------+ | Transform | <--------+ | -/// (sGpuCache) | | +-> | Picture task | +------------+ | +/// (float gpu buffer) | | +-> | Picture task | +------------+ | /// +-------------+ | | | | | /// | Resource | <---+ | | ... | | /// | | | +---------------+ +--------------------------------+ /// | | | | -/// +-------------+ | (sGpuCache) v (sGpuCache) +/// +-------------+ | (float gpu buffer) v (float gpu buffer) /// | +---------------+ +--------------+---------------+-+-+ /// +-----> | Clip area | | Brush data | Segment data | | | /// | | | | | | | @@ -113,7 +113,7 @@ void brush_shader_main_vs( VECS_PER_SPECIFIC_BRUSH + instance.segment_index * VECS_PER_SEGMENT; - vec4[2] segment_info = fetch_from_gpu_cache_2(segment_address); + vec4[2] segment_info = fetch_from_gpu_buffer_2f(segment_address); segment_rect = RectWithEndpoint(segment_info[0].xy, segment_info[0].zw); segment_rect.p0 += ph.local_rect.p0; segment_rect.p1 += ph.local_rect.p0; diff --git a/gfx/wr/webrender/res/brush_image.glsl b/gfx/wr/webrender/res/brush_image.glsl @@ -38,7 +38,7 @@ struct ImageBrushData { }; ImageBrushData fetch_image_data(int address) { - vec4[3] raw_data = fetch_from_gpu_cache_3(address); + vec4[3] raw_data = fetch_from_gpu_buffer_3f(address); ImageBrushData data = ImageBrushData( raw_data[0], raw_data[1], diff --git a/gfx/wr/webrender/res/brush_linear_gradient.glsl b/gfx/wr/webrender/res/brush_linear_gradient.glsl @@ -20,7 +20,7 @@ struct Gradient { }; Gradient fetch_gradient(int address) { - vec4 data[2] = fetch_from_gpu_cache_2(address); + vec4 data[2] = fetch_from_gpu_buffer_2f(address); return Gradient( data[0], int(data[1].x), diff --git a/gfx/wr/webrender/res/brush_solid.glsl b/gfx/wr/webrender/res/brush_solid.glsl @@ -15,7 +15,7 @@ struct SolidBrush { }; SolidBrush fetch_solid_primitive(int address) { - vec4 data = fetch_from_gpu_cache_1(address); + vec4 data = fetch_from_gpu_buffer_1f(address); return SolidBrush(data); } diff --git a/gfx/wr/webrender/res/brush_yuv_image.glsl b/gfx/wr/webrender/res/brush_yuv_image.glsl @@ -28,7 +28,7 @@ flat varying mediump int vRescaleFactor; #ifdef WR_VERTEX_SHADER YuvPrimitive fetch_yuv_primitive(int address) { - vec4 data = fetch_from_gpu_cache_1(address); + vec4 data = fetch_from_gpu_buffer_1f(address); // From YuvImageData.write_prim_gpu_blocks: int channel_bit_depth = int(data.x); int color_space = int(data.y); diff --git a/gfx/wr/webrender/res/clip_shared.glsl b/gfx/wr/webrender/res/clip_shared.glsl @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include rect,render_task,gpu_cache,transform +#include rect,render_task,transform #ifdef WR_VERTEX_SHADER diff --git a/gfx/wr/webrender/res/cs_clip_box_shadow.glsl b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl @@ -17,7 +17,7 @@ flat varying mediump vec2 vClipMode; #ifdef WR_VERTEX_SHADER -PER_INSTANCE in ivec2 aClipDataResourceAddress; +PER_INSTANCE in int aClipDataResourceAddress; PER_INSTANCE in vec2 aClipSrcRectSize; PER_INSTANCE in int aClipMode; PER_INSTANCE in ivec2 aStretchMode; @@ -25,7 +25,7 @@ PER_INSTANCE in vec4 aClipDestRect; struct ClipMaskInstanceBoxShadow { ClipMaskInstanceCommon base; - ivec2 resource_address; + int resource_address; }; ClipMaskInstanceBoxShadow fetch_clip_item() { @@ -61,7 +61,7 @@ void main(void) { Transform clip_transform = fetch_transform(cmi.base.clip_transform_id); Transform prim_transform = fetch_transform(cmi.base.prim_transform_id); BoxShadowData bs_data = fetch_data(); - ImageSource res = fetch_image_source_direct(cmi.resource_address); + ImageSource res = fetch_image_source(cmi.resource_address); RectWithEndpoint dest_rect = bs_data.dest_rect; diff --git a/gfx/wr/webrender/res/cs_conic_gradient.glsl b/gfx/wr/webrender/res/cs_conic_gradient.glsl @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient +#include shared,rect,render_task,gpu_buffer,gradient #define PI 3.141592653589793 diff --git a/gfx/wr/webrender/res/cs_linear_gradient.glsl b/gfx/wr/webrender/res/cs_linear_gradient.glsl @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient +#include shared,rect,render_task,gpu_buffer,gradient varying highp vec2 v_pos; diff --git a/gfx/wr/webrender/res/cs_radial_gradient.glsl b/gfx/wr/webrender/res/cs_radial_gradient.glsl @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient +#include shared,rect,render_task,gpu_buffer,gradient varying highp vec2 v_pos; diff --git a/gfx/wr/webrender/res/cs_svg_filter.glsl b/gfx/wr/webrender/res/cs_svg_filter.glsl @@ -53,7 +53,7 @@ PER_INSTANCE in int aFilterInput2TaskAddress; PER_INSTANCE in int aFilterKind; PER_INSTANCE in int aFilterInputCount; PER_INSTANCE in int aFilterGenericInt; -PER_INSTANCE in ivec2 aFilterExtraDataAddress; +PER_INSTANCE in int aFilterExtraDataAddress; struct FilterTask { RectWithEndpoint task_rect; @@ -126,18 +126,20 @@ void main(void) { vData = ivec4(aFilterGenericInt, 0, 0, 0); break; case FILTER_FLOOD: - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); break; case FILTER_OPACITY: vFloat0.x = filter_task.user_data.x; break; - case FILTER_COLOR_MATRIX: - vec4 mat_data[4] = fetch_from_gpu_buffer_4f_direct(aFilterExtraDataAddress); + case FILTER_COLOR_MATRIX: { + ivec2 buffer_uv = get_gpu_buffer_uv(aFilterExtraDataAddress); + vec4 mat_data[4] = fetch_from_gpu_buffer_4f_direct(buffer_uv); vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]); - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress + ivec2(4, 0)); + vFilterData0 = fetch_from_gpu_buffer_1f_direct(buffer_uv + ivec2(4, 0)); break; + } case FILTER_DROP_SHADOW: - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); break; case FILTER_OFFSET: vec2 texture_size = vec2(TEX_SIZE(sColor0).xy); @@ -148,13 +150,15 @@ void main(void) { clipRect /= texture_size.xyxy; vFilterData1 = clipRect; break; - case FILTER_COMPONENT_TRANSFER: - vData = ivec4(aFilterExtraDataAddress, 0, 0); + case FILTER_COMPONENT_TRANSFER: { + ivec2 buffer_uv = get_gpu_buffer_uv(aFilterExtraDataAddress); + vData = ivec4(buffer_uv, 0, 0); break; + } case FILTER_COMPOSITE: vData = ivec4(aFilterGenericInt, 0, 0, 0); if (aFilterGenericInt == COMPOSITE_ARITHMETIC) { - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); } break; default: diff --git a/gfx/wr/webrender/res/cs_svg_filter_node.glsl b/gfx/wr/webrender/res/cs_svg_filter_node.glsl @@ -172,7 +172,7 @@ PER_INSTANCE in int aFilterInput1TaskAddress; PER_INSTANCE in int aFilterInput2TaskAddress; PER_INSTANCE in int aFilterKind; PER_INSTANCE in int aFilterInputCount; -PER_INSTANCE in ivec2 aFilterExtraDataAddress; +PER_INSTANCE in int aFilterExtraDataAddress; // used for feFlood and feDropShadow colors // this is based on SrgbToLinear below, but that version hits SWGL compile @@ -270,19 +270,23 @@ void main(void) { case FILTER_BLEND_SOFT_LIGHT_CONVERTSRGB: break; case FILTER_COLOR_MATRIX: - case FILTER_COLOR_MATRIX_CONVERTSRGB: - vec4 mat_data[4] = fetch_from_gpu_buffer_4f_direct(aFilterExtraDataAddress); + case FILTER_COLOR_MATRIX_CONVERTSRGB: { + ivec2 gpu_buffer_uv = get_gpu_buffer_uv(aFilterExtraDataAddress); + vec4 mat_data[4] = fetch_from_gpu_buffer_4f_direct(gpu_buffer_uv); vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]); - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress + ivec2(4, 0)); + vFilterData0 = fetch_from_gpu_buffer_1f_direct(gpu_buffer_uv + ivec2(4, 0)); break; + } case FILTER_COMPONENT_TRANSFER: - case FILTER_COMPONENT_TRANSFER_CONVERTSRGB: - vData = ivec4(aFilterExtraDataAddress, 0, 0); + case FILTER_COMPONENT_TRANSFER_CONVERTSRGB: { + ivec2 gpu_buffer_uv = get_gpu_buffer_uv(aFilterExtraDataAddress); + vData = ivec4(gpu_buffer_uv, 0, 0); break; + } case FILTER_COMPOSITE_ARITHMETIC: case FILTER_COMPOSITE_ARITHMETIC_CONVERTSRGB: // arithmetic parameters - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); break; case FILTER_COMPOSITE_ATOP: case FILTER_COMPOSITE_ATOP_CONVERTSRGB: @@ -326,12 +330,12 @@ void main(void) { // TODO break; case FILTER_DROP_SHADOW: - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); // premultiply the color vFilterData0.rgb = vFilterData0.rgb * vFilterData0.a; break; case FILTER_DROP_SHADOW_CONVERTSRGB: - vFilterData0 = fetch_from_gpu_buffer_1f_direct(aFilterExtraDataAddress); + vFilterData0 = fetch_from_gpu_buffer_1f(aFilterExtraDataAddress); // convert from sRGB to linearRGB and premultiply by alpha vFilterData0.rgb = vertexSrgbToLinear(vFilterData0.rgb); vFilterData0.rgb = vFilterData0.rgb * vFilterData0.a; @@ -682,10 +686,10 @@ void main(void) { result = floor(clamp(Ns * 255.0, vec4(0.0), vec4(255.0))); // SWGL doesn't have an intrinsic for ivec4(vec4) k = ivec4(int(result.r), int(result.g), int(result.b), int(result.a)); - result.r = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(k.r, 0)).r; - result.g = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(k.g, 0)).g; - result.b = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(k.b, 0)).b; - result.a = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(k.a, 0)).a; + result.r = fetch_from_gpu_buffer_1f_direct(vData.xy + ivec2(k.r, 0)).r; + result.g = fetch_from_gpu_buffer_1f_direct(vData.xy + ivec2(k.g, 0)).g; + result.b = fetch_from_gpu_buffer_1f_direct(vData.xy + ivec2(k.b, 0)).b; + result.a = fetch_from_gpu_buffer_1f_direct(vData.xy + ivec2(k.a, 0)).a; result.rgb = result.rgb * result.a; } else if (vFilterKind == FILTER_COMPOSITE_ARITHMETIC || vFilterKind == FILTER_COMPOSITE_ARITHMETIC_CONVERTSRGB) { result = Rs * Rb * vFilterData0.x + Rs * vFilterData0.y + Rb * vFilterData0.z + vec4(vFilterData0.w); diff --git a/gfx/wr/webrender/res/gpu_cache.glsl b/gfx/wr/webrender/res/gpu_cache.glsl @@ -4,8 +4,6 @@ uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache; -#define VECS_PER_IMAGE_RESOURCE 2 - // TODO(gw): This is here temporarily while we have // both GPU store and cache. When the GPU // store code is removed, we can change the diff --git a/gfx/wr/webrender/res/image_source.glsl b/gfx/wr/webrender/res/image_source.glsl @@ -4,6 +4,8 @@ #include gpu_buffer +#define VECS_PER_IMAGE_RESOURCE 2 + #ifdef WR_VERTEX_SHADER #include rect diff --git a/gfx/wr/webrender/res/prim_shared.glsl b/gfx/wr/webrender/res/prim_shared.glsl @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include rect,render_task,gpu_cache,transform,image_source +#include rect,render_task,transform,image_source #define EXTEND_MODE_CLAMP 0 #define EXTEND_MODE_REPEAT 1 diff --git a/gfx/wr/webrender/res/ps_quad.glsl b/gfx/wr/webrender/res/ps_quad.glsl @@ -10,14 +10,14 @@ /// ///```ascii /// (int gpu buffer) -/// +---------------+ (sGpuCache) +/// +---------------+ (float gpu buffer) /// (instance-step vertex attr) | Int header | +-----------+ /// +-----------------------------+ | | | Transform | /// | Quad instance (uvec4) | +--> | transform id +--> +-----------+ /// | | | | z id | /// | x: int prim address +---+ +---------------+ (float gpu buffer) /// | y: float prim address +--------------------------> +-----------+--------------+-+-+ -/// | z: quad flags | (sGpuCache) | Quad Prim | Quad Segment | | | +/// | z: quad flags | (float gpu buffer) | Quad Prim | Quad Segment | | | /// | edge flags | +--------------------+ | | | | | /// | part index | | Picture task | | bounds | rect | | | /// | segment index | | | | clip | uv rect | | | diff --git a/gfx/wr/webrender/res/ps_split_composite.glsl b/gfx/wr/webrender/res/ps_split_composite.glsl @@ -21,17 +21,14 @@ struct SplitGeometry { }; SplitGeometry fetch_split_geometry(int address) { - ivec2 uv = get_gpu_cache_uv(address); - - vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)); - vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)); + vec4[2] data = fetch_from_gpu_buffer_2f(address); SplitGeometry geo; geo.local = vec2[4]( - data0.xy, - data0.zw, - data1.xy, - data1.zw + data[0].xy, + data[0].zw, + data[1].xy, + data[1].zw ); return geo; diff --git a/gfx/wr/webrender/res/ps_text_run.glsl b/gfx/wr/webrender/res/ps_text_run.glsl @@ -45,7 +45,7 @@ Glyph fetch_glyph(int specific_prim_address, int glyph_address = specific_prim_address + VECS_PER_TEXT_RUN + int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK); - vec4 data = fetch_from_gpu_cache_1(glyph_address); + vec4 data = fetch_from_gpu_buffer_1f(glyph_address); // Select XY or ZW based on glyph index. vec2 glyph = mix(data.xy, data.zw, bvec2(uint(glyph_index) % GLYPHS_PER_GPU_BLOCK == 1U)); @@ -69,7 +69,7 @@ struct TextRun { }; TextRun fetch_text_run(int address) { - vec4 data = fetch_from_gpu_cache_1(address); + vec4 data = fetch_from_gpu_buffer_1f(address); return TextRun(data); } diff --git a/gfx/wr/webrender/src/batch.rs b/gfx/wr/webrender/src/batch.rs @@ -11,7 +11,6 @@ use crate::composite::CompositorSurfaceKind; use crate::pattern::PatternKind; use crate::spatial_tree::{SpatialTree, SpatialNodeIndex, CoordinateSystemId}; use glyph_rasterizer::{GlyphFormat, SubpixelDirection}; -use crate::gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress}; use crate::gpu_types::{BrushFlags, BrushInstance, ImageSource, PrimitiveHeaders, UvRectKind, ZBufferId, ZBufferIdGenerator}; use crate::gpu_types::SplitCompositeInstance; use crate::gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance}; @@ -28,7 +27,7 @@ use crate::quad; use crate::render_target::RenderTargetContext; use crate::render_task_graph::{RenderTaskId, RenderTaskGraph}; use crate::render_task::{RenderTaskAddress, RenderTaskKind, SubPass}; -use crate::renderer::{BlendMode, GpuBufferBuilder, ShaderColorMode}; +use crate::renderer::{BlendMode, GpuBufferAddress, GpuBufferBlockF, GpuBufferBuilder, ShaderColorMode}; use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; use crate::resource_cache::{GlyphFetchResult, ImageProperties}; use crate::space::SpaceMapper; @@ -38,6 +37,7 @@ use std::{f32, i32, usize}; use crate::util::{project_rect, MaxRect, TransformedRectKind, ScaleOffset}; use crate::segment::EdgeAaSegmentMask; + // Special sentinel value recognized by the shader. It is considered to be // a dummy task that doesn't mask out anything. const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(0x7fffffff); @@ -820,7 +820,6 @@ impl BatchBuilder { cmd: &PrimitiveCommand, prim_spatial_node_index: SpatialNodeIndex, ctx: &RenderTargetContext, - gpu_cache: &mut GpuCache, render_tasks: &RenderTaskGraph, prim_headers: &mut PrimitiveHeaders, transforms: &mut TransformPalette, @@ -992,7 +991,7 @@ impl BatchBuilder { } let blend_mode = BlendMode::PremultipliedAlpha; - let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_image_handle); + let prim_cache_address = ctx.globals.default_image_data; match picture.raster_config { Some(ref raster_config) => { @@ -1040,7 +1039,7 @@ impl BatchBuilder { let picture_prim_header = PrimitiveHeader { local_rect: prim_rect, local_clip_rect, - specific_prim_address: prim_cache_address, + specific_prim_address: prim_cache_address.as_int(), transform_id, z: z_id, render_task_address: self.batcher.render_task_address, @@ -1160,15 +1159,12 @@ impl BatchBuilder { let shadow_key = BatchKey::new(kind, blend_mode, shadow_textures); let content_key = BatchKey::new(kind, blend_mode, content_textures); - for (shadow, shadow_gpu_data) in shadows.iter().zip(picture.extra_gpu_data_handles.iter()) { - // Get the GPU cache address of the extra data handle. - let shadow_prim_address = gpu_cache.get_address(shadow_gpu_data); - + for (shadow, shadow_prim_address) in shadows.iter().zip(picture.extra_gpu_data.iter()) { let shadow_rect = picture_prim_header.local_rect.translate(shadow.offset); let shadow_prim_header = PrimitiveHeader { local_rect: shadow_rect, - specific_prim_address: shadow_prim_address, + specific_prim_address: shadow_prim_address.as_int(), z: z_id, user_data: ImageBrushData { color_mode: ShaderColorMode::Alpha, @@ -1243,10 +1239,10 @@ impl BatchBuilder { (0.01745329251 * angle * 65536.0) as i32 } Filter::ColorMatrix(_) => { - picture.extra_gpu_data_handles[0].as_int(gpu_cache) + picture.extra_gpu_data[0].as_int() } Filter::Flood(_) => { - picture.extra_gpu_data_handles[0].as_int(gpu_cache) + picture.extra_gpu_data[0].as_int() } // These filters are handled via different paths. @@ -1493,7 +1489,7 @@ impl BatchBuilder { ); let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address, + specific_prim_address: prim_cache_address.as_int(), user_data: batch_params.prim_user_data, ..picture_prim_header }; @@ -1600,7 +1596,7 @@ impl BatchBuilder { transform_id, z: z_id, render_task_address: self.batcher.render_task_address, - specific_prim_address: GpuCacheAddress::INVALID, // Will be overridden by most uses + specific_prim_address: GpuBufferAddress::INVALID.as_int(), // Will be overridden by most uses user_data: [0; 4], // Will be overridden by most uses }; @@ -1624,11 +1620,11 @@ impl BatchBuilder { }; let (prim_cache_address, segments) = if segment_instance_index == SegmentInstanceIndex::UNUSED { - (gpu_cache.try_get_address(&common_data.gpu_cache_handle), None) + (common_data.gpu_buffer_address, None) } else { let segment_instance = &ctx.scratch.segment_instances[segment_instance_index]; let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]); - (Some(gpu_cache.get_address(&segment_instance.gpu_cache_handle)), segments) + (segment_instance.gpu_data, segments) }; // The following primitives lower to the image brush shader in the same way. @@ -1672,7 +1668,7 @@ impl BatchBuilder { }.encode(); let prim_header = PrimitiveHeader { - specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle), + specific_prim_address: common_data.gpu_buffer_address.as_int(), user_data: prim_user_data, ..base_prim_header }; @@ -1775,7 +1771,7 @@ impl BatchBuilder { // use of interning. let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: [get_shader_opacity(1.0), 0, 0, 0], ..base_prim_header }; @@ -1834,7 +1830,7 @@ impl BatchBuilder { ); let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: batch_params.prim_user_data, ..base_prim_header }; @@ -1882,7 +1878,7 @@ impl BatchBuilder { min: prim_rect.min - run.reference_frame_relative_offset, max: run.snapped_reference_frame_relative_offset.to_point(), }, - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: [ (run.raster_scale * 65535.0).round() as i32, 0, @@ -2115,7 +2111,7 @@ impl BatchBuilder { }; let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: prim_user_data, ..base_prim_header }; @@ -2150,7 +2146,7 @@ impl BatchBuilder { ); let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: batch_params.prim_user_data, ..base_prim_header }; @@ -2183,7 +2179,6 @@ impl BatchBuilder { z_id, bounding_rect, ctx, - gpu_cache, render_tasks, prim_headers, ); @@ -2245,7 +2240,7 @@ impl BatchBuilder { debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID); let prim_header = PrimitiveHeader { - specific_prim_address: prim_cache_address.unwrap(), + specific_prim_address: prim_cache_address.as_int(), user_data: batch_params.prim_user_data, ..base_prim_header }; @@ -2278,7 +2273,6 @@ impl BatchBuilder { z_id, bounding_rect, ctx, - gpu_cache, render_tasks, prim_headers, ); @@ -2330,11 +2324,11 @@ impl BatchBuilder { debug_assert_ne!(image_instance.segment_instance_index, SegmentInstanceIndex::INVALID); let (prim_cache_address, segments) = if image_instance.segment_instance_index == SegmentInstanceIndex::UNUSED { - (prim_cache_address.unwrap(), None) + (prim_cache_address, None) } else { let segment_instance = &ctx.scratch.segment_instances[image_instance.segment_instance_index]; let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]); - (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments) + (segment_instance.gpu_data, segments) }; let local_rect = image_instance.adjustment.map_local_rect(&prim_rect); @@ -2344,7 +2338,7 @@ impl BatchBuilder { let prim_header = PrimitiveHeader { local_rect, local_clip_rect, - specific_prim_address: prim_cache_address, + specific_prim_address: prim_cache_address.as_int(), user_data: batch_params.prim_user_data, ..base_prim_header }; @@ -2382,7 +2376,7 @@ impl BatchBuilder { ).unwrap(); // use temporary block storage since we don't know the number of visible tiles beforehand - let mut gpu_blocks = Vec::<GpuBlockData>::with_capacity(3 + max_tiles_per_header * 2); + let mut gpu_blocks = Vec::<GpuBufferBlockF>::with_capacity(3 + max_tiles_per_header * 2); for chunk in image_instance.visible_tiles.chunks(max_tiles_per_header) { gpu_blocks.clear(); gpu_blocks.push(image_data.color.premultiplied().into()); //color @@ -2392,13 +2386,18 @@ impl BatchBuilder { for tile in chunk { let tile_rect = tile.local_rect.translate(-prim_rect.min.to_vector()); gpu_blocks.push(tile_rect.into()); - gpu_blocks.push(GpuBlockData::EMPTY); + gpu_blocks.push([0.0; 4].into()); } - let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks); + let mut writer = gpu_buffer_builder.f32.write_blocks(gpu_blocks.len()); + for block in &gpu_blocks { + writer.push_one(*block); + } + let specific_prim_address = writer.finish(); + let prim_header = PrimitiveHeader { local_clip_rect: image_instance.tight_local_clip_rect, - specific_prim_address: gpu_cache.get_address(&gpu_handle), + specific_prim_address: specific_prim_address.as_int(), user_data: prim_user_data, ..base_prim_header }; @@ -2454,7 +2453,7 @@ impl BatchBuilder { let prim_header = PrimitiveHeader { user_data: user_data, - specific_prim_address: gpu_cache.get_address(&prim_data.gpu_cache_handle), + specific_prim_address: prim_data.gpu_buffer_address.as_int(), ..base_prim_header }; let prim_header_index = prim_headers.push(&prim_header); @@ -2496,7 +2495,7 @@ impl BatchBuilder { for tile in visible_tiles { let tile_prim_header = PrimitiveHeader { - specific_prim_address: gpu_cache.get_address(&tile.handle), + specific_prim_address: tile.address.as_int(), local_rect: tile.local_rect, local_clip_rect: tile.local_clip_rect, user_data: user_data, @@ -2544,7 +2543,7 @@ impl BatchBuilder { ); let prim_header = PrimitiveHeader { - specific_prim_address: gpu_cache.get_address(&ctx.globals.default_image_handle), + specific_prim_address: ctx.globals.default_image_data.as_int(), user_data: ImageBrushData { color_mode: ShaderColorMode::Image, alpha_type: AlphaType::PremultipliedAlpha, @@ -2633,12 +2632,9 @@ impl BatchBuilder { z_id: ZBufferId, bounding_rect: &PictureRect, ctx: &RenderTargetContext, - gpu_cache: &mut GpuCache, render_tasks: &RenderTaskGraph, prim_headers: &mut PrimitiveHeaders, ) { - let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_black_rect_handle); - let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture( clip_task_index, render_tasks, @@ -2647,7 +2643,7 @@ impl BatchBuilder { let prim_header = PrimitiveHeader { local_rect: prim_rect, local_clip_rect, - specific_prim_address: prim_cache_address, + specific_prim_address: ctx.globals.default_black_rect_address.as_int(), transform_id, z: z_id, render_task_address: self.batcher.render_task_address, @@ -3201,7 +3197,7 @@ impl ClipBatcher { .or_insert_with(|| ctx.frame_memory.new_vec()) .push(ClipMaskInstanceBoxShadow { common, - resource_address: uv_rect_address, + resource_address: uv_rect_address.as_int(), shadow_data: BoxShadowData { src_rect_size: source.original_alloc_size, clip_mode: source.clip_mode as i32, diff --git a/gfx/wr/webrender/src/command_buffer.rs b/gfx/wr/webrender/src/command_buffer.rs @@ -4,7 +4,7 @@ use api::units::PictureRect; use crate::pattern::{PatternKind, PatternShaderInput}; -use crate::{spatial_tree::SpatialNodeIndex, render_task_graph::RenderTaskId, surface::SurfaceTileDescriptor, picture::TileKey, renderer::GpuBufferAddress, FastHashMap, prim_store::PrimitiveInstanceIndex, gpu_cache::GpuCacheAddress}; +use crate::{spatial_tree::SpatialNodeIndex, render_task_graph::RenderTaskId, surface::SurfaceTileDescriptor, picture::TileKey, renderer::GpuBufferAddress, FastHashMap, prim_store::PrimitiveInstanceIndex}; use crate::gpu_types::{QuadSegment, TransformPaletteId}; use crate::segment::EdgeAaSegmentMask; @@ -112,7 +112,7 @@ pub enum PrimitiveCommand { }, Complex { prim_instance_index: PrimitiveInstanceIndex, - gpu_address: GpuCacheAddress, + gpu_address: GpuBufferAddress, }, Instance { prim_instance_index: PrimitiveInstanceIndex, @@ -142,7 +142,7 @@ impl PrimitiveCommand { pub fn complex( prim_instance_index: PrimitiveInstanceIndex, - gpu_address: GpuCacheAddress, + gpu_address: GpuBufferAddress, ) -> Self { PrimitiveCommand::Complex { prim_instance_index, @@ -284,7 +284,7 @@ impl CommandBuffer { Command::CMD_DRAW_COMPLEX_PRIM => { let prim_instance_index = PrimitiveInstanceIndex(param); let data = cmd_iter.next().unwrap(); - let gpu_address = GpuCacheAddress { + let gpu_address = GpuBufferAddress { u: (data.0 >> 16) as u16, v: (data.0 & 0xffff) as u16, }; diff --git a/gfx/wr/webrender/src/frame_builder.rs b/gfx/wr/webrender/src/frame_builder.rs @@ -13,7 +13,7 @@ use crate::spatial_node::SpatialNodeType; use crate::spatial_tree::{SpatialTree, SpatialNodeIndex}; use crate::composite::{CompositorKind, CompositeState, CompositeStatePreallocator}; use crate::debug_item::DebugItem; -use crate::gpu_cache::{GpuCache, GpuCacheHandle}; +use crate::gpu_cache::GpuCache; use crate::gpu_types::{PrimitiveHeaders, TransformPalette, ZBufferIdGenerator}; use crate::gpu_types::{QuadSegment, TransformData}; use crate::internal_types::{FastHashMap, PlaneSplitter, FrameId, FrameStamp}; @@ -25,7 +25,7 @@ use crate::prim_store::{PictureIndex, PrimitiveScratchBuffer}; use crate::prim_store::{DeferredResolve, PrimitiveInstance}; use crate::profiler::{self, TransactionProfile}; use crate::render_backend::{DataStores, ScratchBuffer}; -use crate::renderer::{GpuBufferF, GpuBufferBuilderF, GpuBufferI, GpuBufferBuilderI, GpuBufferBuilder}; +use crate::renderer::{GpuBufferAddress, GpuBufferBuilder, GpuBufferBuilderF, GpuBufferBuilderI, GpuBufferF, GpuBufferI}; use crate::render_target::{PictureCacheTarget, PictureCacheTargetKind}; use crate::render_target::{RenderTargetContext, RenderTargetKind, RenderTarget}; use crate::render_task_graph::{Pass, RenderTaskGraph, RenderTaskId, SubPassSurface}; @@ -81,40 +81,40 @@ pub struct FrameBuilderConfig { pub struct FrameGlobalResources { /// The image shader block for the most common / default /// set of image parameters (color white, stretch == rect.size). - pub default_image_handle: GpuCacheHandle, + pub default_image_data: GpuBufferAddress, /// A GPU cache config for drawing cut-out rectangle primitives. /// This is used to 'cut out' overlay tiles where a compositor /// surface exists. - pub default_black_rect_handle: GpuCacheHandle, + pub default_black_rect_address: GpuBufferAddress, } impl FrameGlobalResources { pub fn empty() -> Self { FrameGlobalResources { - default_image_handle: GpuCacheHandle::new(), - default_black_rect_handle: GpuCacheHandle::new(), + default_image_data: GpuBufferAddress::INVALID, + default_black_rect_address: GpuBufferAddress::INVALID, } } pub fn update( &mut self, - gpu_cache: &mut GpuCache, + gpu_buffers: &mut GpuBufferBuilder, ) { - if let Some(mut request) = gpu_cache.request(&mut self.default_image_handle) { - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ - -1.0, // -ve means use prim rect for stretch size - 0.0, - 0.0, - 0.0, - ]); - } - - if let Some(mut request) = gpu_cache.request(&mut self.default_black_rect_handle) { - request.push(PremultipliedColorF::BLACK); - } + let mut writer = gpu_buffers.f32.write_blocks(3); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ + -1.0, // -ve means use prim rect for stretch size + 0.0, + 0.0, + 0.0, + ]); + self.default_image_data = writer.finish(); + + let mut writer = gpu_buffers.f32.write_blocks(1); + writer.push_one(PremultipliedColorF::BLACK); + self.default_black_rect_address = writer.finish(); } } @@ -663,6 +663,11 @@ impl FrameBuilder { profile_marker!("BuildFrame"); let mut frame_memory = FrameMemory::new(chunk_pool, stamp.frame_id()); + // TODO(gw): Recycle backing vec buffers for gpu buffer builder between frames + let mut gpu_buffer_builder = GpuBufferBuilder { + f32: GpuBufferBuilderF::new(&frame_memory), + i32: GpuBufferBuilderI::new(&frame_memory), + }; profile.set(profiler::PRIMITIVES, scene.prim_instances.len()); profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count); @@ -674,7 +679,7 @@ impl FrameBuilder { // statically during scene building. scene.surfaces.clear(); - self.globals.update(gpu_cache); + self.globals.update(&mut gpu_buffer_builder); spatial_tree.update_tree(scene_properties); let mut transform_palette = spatial_tree.build_transform_palette(&frame_memory); @@ -700,12 +705,6 @@ impl FrameBuilder { let mut cmd_buffers = CommandBufferList::new(); - // TODO(gw): Recycle backing vec buffers for gpu buffer builder between frames - let mut gpu_buffer_builder = GpuBufferBuilder { - f32: GpuBufferBuilderF::new(&frame_memory), - i32: GpuBufferBuilderI::new(&frame_memory), - }; - self.build_layer_screen_rects_and_cull_layers( scene, present, @@ -1057,7 +1056,7 @@ pub fn build_render_pass( src_pass: &Pass, screen_size: DeviceIntSize, ctx: &mut RenderTargetContext, - gpu_cache: &mut GpuCache, + _gpu_cache: &mut GpuCache, gpu_buffer_builder: &mut GpuBufferBuilder, render_tasks: &RenderTaskGraph, clip_store: &ClipStore, @@ -1167,7 +1166,6 @@ pub fn build_render_pass( cmd, spatial_node_index, ctx, - gpu_cache, render_tasks, prim_headers, transforms, @@ -1271,7 +1269,6 @@ pub fn build_render_pass( pass.color.build( ctx, - gpu_cache, render_tasks, prim_headers, transforms, @@ -1282,7 +1279,6 @@ pub fn build_render_pass( ); pass.alpha.build( ctx, - gpu_cache, render_tasks, prim_headers, transforms, @@ -1295,7 +1291,6 @@ pub fn build_render_pass( for target in &mut pass.texture_cache.values_mut() { target.build( ctx, - gpu_cache, render_tasks, prim_headers, transforms, diff --git a/gfx/wr/webrender/src/gpu_types.rs b/gfx/wr/webrender/src/gpu_types.rs @@ -8,7 +8,6 @@ use euclid::HomogeneousVector; use crate::composite::{CompositeFeatures, CompositorClip}; use crate::segment::EdgeAaSegmentMask; use crate::spatial_tree::{SpatialTree, SpatialNodeIndex}; -use crate::gpu_cache::GpuCacheAddress; use crate::internal_types::{FastHashMap, FrameVec, FrameMemory}; use crate::prim_store::ClipData; use crate::render_task::RenderTaskAddress; @@ -173,7 +172,7 @@ pub struct SvgFilterInstance { pub input_count: u16, pub generic_int: u16, pub padding: u16, - pub extra_data_address: GpuBufferAddress, + pub extra_data_address: i32, } #[derive(Clone, Debug)] @@ -188,7 +187,7 @@ pub struct SVGFEFilterInstance { pub input_2_task_address: RenderTaskAddress, pub kind: u16, pub input_count: u16, - pub extra_data_address: GpuBufferAddress, + pub extra_data_address: i32, } #[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)] @@ -262,7 +261,7 @@ pub struct BoxShadowData { #[repr(C)] pub struct ClipMaskInstanceBoxShadow { pub common: ClipMaskInstanceCommon, - pub resource_address: GpuBufferAddress, + pub resource_address: i32, pub shadow_data: BoxShadowData, } @@ -506,7 +505,7 @@ impl PrimitiveHeaders { self.headers_int.push(PrimitiveHeaderI { z: prim_header.z, render_task_address: prim_header.render_task_address, - specific_prim_address: prim_header.specific_prim_address.as_int(), + specific_prim_address: prim_header.specific_prim_address, transform_id: prim_header.transform_id, user_data: prim_header.user_data, }); @@ -521,7 +520,7 @@ impl PrimitiveHeaders { pub struct PrimitiveHeader { pub local_rect: LayoutRect, pub local_clip_rect: LayoutRect, - pub specific_prim_address: GpuCacheAddress, + pub specific_prim_address: i32, pub transform_id: TransformPaletteId, pub z: ZBufferId, pub render_task_address: RenderTaskAddress, @@ -613,8 +612,8 @@ impl From<SplitCompositeInstance> for PrimitiveInstanceData { #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct QuadInstance { pub dst_task_address: RenderTaskAddress, - pub prim_address_i: GpuBufferAddress, - pub prim_address_f: GpuBufferAddress, + pub prim_address_i: i32, + pub prim_address_f: i32, pub quad_flags: u8, pub edge_flags: u8, pub part_index: u8, @@ -632,8 +631,8 @@ impl From<QuadInstance> for PrimitiveInstanceData { PrimitiveInstanceData { data: [ - instance.prim_address_i.as_int(), - instance.prim_address_f.as_int(), + instance.prim_address_i, + instance.prim_address_f, ((instance.quad_flags as i32) << 24) | ((instance.edge_flags as i32) << 16) | diff --git a/gfx/wr/webrender/src/picture.rs b/gfx/wr/webrender/src/picture.rs @@ -116,7 +116,7 @@ use crate::intern::ItemUid; use crate::internal_types::{FastHashMap, FastHashSet, PlaneSplitter, FilterGraphOp, FilterGraphNode, Filter, FrameId}; use crate::internal_types::{PlaneSplitterIndex, PlaneSplitAnchor, TextureSource}; use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext}; -use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle}; +use crate::gpu_cache::GpuCache; use crate::gpu_types::{UvRectKind, ZBufferId, BlurEdgeMode}; use peek_poke::{PeekPoke, poke_into_vec, peek_from_slice, ensure_red_zone}; use plane_split::{Clipper, Polygon}; @@ -128,7 +128,7 @@ use crate::render_task_graph::RenderTaskId; use crate::render_target::RenderTargetKind; use crate::render_task::{BlurTask, RenderTask, RenderTaskLocation, BlurTaskCache}; use crate::render_task::{StaticRenderTaskSurface, RenderTaskKind}; -use crate::renderer::BlendMode; +use crate::renderer::{BlendMode, GpuBufferAddress}; use crate::resource_cache::{ResourceCache, ImageGeneration, ImageRequest}; use crate::space::SpaceMapper; use crate::scene::SceneProperties; @@ -4957,7 +4957,7 @@ pub enum Picture3DContext<C> { #[cfg_attr(feature = "capture", derive(Serialize))] pub struct OrderedPictureChild { pub anchor: PlaneSplitAnchor, - pub gpu_address: GpuCacheAddress, + pub gpu_address: GpuBufferAddress, } bitflags! { @@ -5218,7 +5218,7 @@ pub struct PicturePrimitive { // Optional cache handles for storing extra data // in the GPU cache, depending on the type of // picture. - pub extra_gpu_data_handles: SmallVec<[GpuCacheHandle; 1]>, + pub extra_gpu_data: SmallVec<[GpuBufferAddress; 1]>, /// The spatial node index of this picture when it is /// composited into the parent picture. @@ -5332,7 +5332,7 @@ impl PicturePrimitive { composite_mode, raster_config: None, context_3d, - extra_gpu_data_handles: SmallVec::new(), + extra_gpu_data: SmallVec::new(), is_backface_visible: prim_flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE), spatial_node_index, prev_local_rect: LayoutRect::zero(), @@ -6028,14 +6028,6 @@ impl PicturePrimitive { // use of the conservative picture rect for segmenting (which should // be done during scene building). if local_rect != self.prev_local_rect { - match raster_config.composite_mode { - PictureCompositeMode::Filter(Filter::DropShadows(..)) => { - for handle in &self.extra_gpu_data_handles { - frame_state.gpu_cache.invalidate(handle); - } - } - _ => {} - } // Invalidate any segments built for this picture, since the local // rect has changed. self.segments_are_valid = false; @@ -6188,7 +6180,7 @@ impl PicturePrimitive { let mut blur_tasks = BlurTaskCache::default(); - self.extra_gpu_data_handles.resize(shadows.len(), GpuCacheHandle::new()); + self.extra_gpu_data.resize(shadows.len(), GpuBufferAddress::INVALID); let mut blur_render_task_id = picture_task_id; for shadow in shadows { @@ -6923,12 +6915,11 @@ impl PicturePrimitive { let p1 = local_points[1].unwrap(); let p2 = local_points[2].unwrap(); let p3 = local_points[3].unwrap(); - let gpu_blocks = [ - [p0.x, p0.y, p1.x, p1.y].into(), - [p2.x, p2.y, p3.x, p3.y].into(), - ]; - let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks); - let gpu_address = gpu_cache.get_address(&gpu_handle); + + let mut writer = gpu_buffer.write_blocks(2); + writer.push_one([p0.x, p0.y, p1.x, p1.y]); + writer.push_one([p2.x, p2.y, p3.x, p3.y]); + let gpu_address = writer.finish(); ordered.push(OrderedPictureChild { anchor: poly.anchor, @@ -7263,60 +7254,61 @@ impl PicturePrimitive { PictureCompositeMode::TileCache { .. } => {} PictureCompositeMode::Filter(Filter::Blur { .. }) => {} PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => { - self.extra_gpu_data_handles.resize(shadows.len(), GpuCacheHandle::new()); - for (shadow, extra_handle) in shadows.iter().zip(self.extra_gpu_data_handles.iter_mut()) { - if let Some(mut request) = frame_state.gpu_cache.request(extra_handle) { - let surface = &frame_state.surfaces[raster_config.surface_index.0]; - let prim_rect = surface.clipped_local_rect.cast_unit(); - - // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs) - // [brush specific data] - // [segment_rect, segment data] - let (blur_inflation_x, blur_inflation_y) = surface.clamp_blur_radius( - shadow.blur_radius, - shadow.blur_radius, - ); + self.extra_gpu_data.resize(shadows.len(), GpuBufferAddress::INVALID); + for (shadow, extra_handle) in shadows.iter().zip(self.extra_gpu_data.iter_mut()) { + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(5); + let surface = &frame_state.surfaces[raster_config.surface_index.0]; + let prim_rect = surface.clipped_local_rect.cast_unit(); - let shadow_rect = prim_rect.inflate( - blur_inflation_x * BLUR_SAMPLE_SCALE, - blur_inflation_y * BLUR_SAMPLE_SCALE, - ).translate(shadow.offset); - - // ImageBrush colors - request.push(shadow.color.premultiplied()); - request.push(PremultipliedColorF::WHITE); - request.push([ - shadow_rect.width(), - shadow_rect.height(), - 0.0, - 0.0, - ]); - - // segment rect / extra data - request.push(shadow_rect); - request.push([0.0, 0.0, 0.0, 0.0]); - } + // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs) + // [brush specific data] + // [segment_rect, segment data] + let (blur_inflation_x, blur_inflation_y) = surface.clamp_blur_radius( + shadow.blur_radius, + shadow.blur_radius, + ); + + let shadow_rect = prim_rect.inflate( + blur_inflation_x * BLUR_SAMPLE_SCALE, + blur_inflation_y * BLUR_SAMPLE_SCALE, + ).translate(shadow.offset); + + // ImageBrush colors + writer.push_one(shadow.color.premultiplied()); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ + shadow_rect.width(), + shadow_rect.height(), + 0.0, + 0.0, + ]); + + // segment rect / extra data + writer.push_one(shadow_rect); + writer.push_one([0.0, 0.0, 0.0, 0.0]); + + *extra_handle = writer.finish(); } } PictureCompositeMode::Filter(ref filter) => { match *filter { Filter::ColorMatrix(ref m) => { - if self.extra_gpu_data_handles.is_empty() { - self.extra_gpu_data_handles.push(GpuCacheHandle::new()); + if self.extra_gpu_data.is_empty() { + self.extra_gpu_data.push(GpuBufferAddress::INVALID); } - if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handles[0]) { - for i in 0..5 { - request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(5); + for i in 0..5 { + writer.push_one([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]); } + self.extra_gpu_data[0] = writer.finish(); } Filter::Flood(ref color) => { - if self.extra_gpu_data_handles.is_empty() { - self.extra_gpu_data_handles.push(GpuCacheHandle::new()); - } - if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handles[0]) { - request.push(color.to_array()); + if self.extra_gpu_data.is_empty() { + self.extra_gpu_data.push(GpuBufferAddress::INVALID); } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(1); + writer.push_one(color.to_array()); + self.extra_gpu_data[0] = writer.finish(); } _ => {} } diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs @@ -6,7 +6,7 @@ //! //! TODO: document this! -use api::{ColorF, DebugFlags, PropertyBinding}; +use api::{ColorF, DebugFlags}; use api::{BoxShadowClipMode, BorderStyle, ClipMode}; use api::units::*; use euclid::Scale; @@ -17,10 +17,10 @@ use crate::image_tiling::{self, Repetition}; use crate::border::{get_max_scale_for_border, build_border_instances}; use crate::clip::{ClipStore, ClipNodeRange}; use crate::pattern::Pattern; +use crate::renderer::{GpuBufferAddress, GpuBufferBuilderF, GpuBufferWriterF}; use crate::spatial_tree::{SpatialNodeIndex, SpatialTree}; use crate::clip::{ClipDataStore, ClipNodeFlags, ClipChainInstance, ClipItemKind}; use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState}; -use crate::gpu_cache::{GpuCacheHandle, GpuDataRequest}; use crate::gpu_types::BrushFlags; use crate::internal_types::{FastHashMap, PlaneSplitAnchor, Filter}; use crate::picture::{ClusterFlags, PictureCompositeMode, PicturePrimitive, SliceId}; @@ -526,8 +526,6 @@ fn prepare_interned_prim_for_render( scratch, ); - // Update the template this instane references, which may refresh the GPU - // cache with any shared template data. prim_data.update(frame_state); } PrimitiveInstanceKind::Clear { data_handle, .. } => { @@ -637,35 +635,13 @@ fn prepare_interned_prim_for_render( frame_state ); } - PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, color_binding_index, use_legacy_path, .. } => { + PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, use_legacy_path, .. } => { profile_scope!("Rectangle"); if *use_legacy_path { let prim_data = &mut data_stores.prim[*data_handle]; prim_data.common.may_need_repetition = false; - // TODO(gw): Legacy rect rendering path - remove once we support masks on quad prims - if *color_binding_index != ColorBindingIndex::INVALID { - match store.color_bindings[*color_binding_index] { - PropertyBinding::Binding(..) => { - // We explicitly invalidate the gpu cache - // if the color is animating. - let gpu_cache_handle = - if *segment_instance_index == SegmentInstanceIndex::INVALID { - None - } else if *segment_instance_index == SegmentInstanceIndex::UNUSED { - Some(&prim_data.common.gpu_cache_handle) - } else { - Some(&scratch.segment_instances[*segment_instance_index].gpu_cache_handle) - }; - if let Some(gpu_cache_handle) = gpu_cache_handle { - frame_state.gpu_cache.invalidate(gpu_cache_handle); - } - } - PropertyBinding::Value(..) => {}, - } - } - // Update the template this instane references, which may refresh the GPU // cache with any shared template data. prim_data.update( @@ -728,8 +704,8 @@ fn prepare_interned_prim_for_render( frame_state, &mut scratch.segments, &mut scratch.segment_instances, - |request| { - yuv_image_data.write_prim_gpu_blocks(request); + |writer| { + yuv_image_data.write_prim_gpu_blocks(writer); } ); } @@ -811,19 +787,22 @@ fn prepare_interned_prim_for_render( frame_state, &mut scratch.gradient_tiles, &frame_context.spatial_tree, - Some(&mut |_, mut request| { - request.push([ + Some(&mut |_, gpu_buffer| { + let mut writer = gpu_buffer.write_blocks(2); + writer.push_one([ prim_data.start_point.x, prim_data.start_point.y, prim_data.end_point.x, prim_data.end_point.y, ]); - request.push([ + writer.push_one([ pack_as_float(prim_data.extend_mode as u32), prim_data.stretch_size.width, prim_data.stretch_size.height, 0.0, ]); + + writer.finish() }), ); @@ -1240,23 +1219,22 @@ fn write_segment<F>( segments: &mut SegmentStorage, segment_instances: &mut SegmentInstanceStorage, f: F, -) where F: Fn(&mut GpuDataRequest) { +) where F: Fn(&mut GpuBufferWriterF) { debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID); if segment_instance_index != SegmentInstanceIndex::UNUSED { let segment_instance = &mut segment_instances[segment_instance_index]; - if let Some(mut request) = frame_state.gpu_cache.request(&mut segment_instance.gpu_cache_handle) { - let segments = &segments[segment_instance.segments_range]; + let segments = &segments[segment_instance.segments_range]; + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + segments.len() * VECS_PER_SEGMENT); - f(&mut request); + f(&mut writer); - for segment in segments { - request.write_segment( - segment.local_rect, - [0.0; 4], - ); - } + for segment in segments { + writer.push_one(segment.local_rect); + writer.push_one([0.0; 4]); } + + segment_instance.gpu_data = writer.finish(); } } @@ -1269,7 +1247,7 @@ fn decompose_repeated_gradient( frame_state: &mut FrameBuildingState, gradient_tiles: &mut GradientTileStorage, spatial_tree: &SpatialTree, - mut callback: Option<&mut dyn FnMut(&LayoutRect, GpuDataRequest)>, + mut callback: Option<&mut dyn FnMut(&LayoutRect, &mut GpuBufferBuilderF) -> GpuBufferAddress>, ) -> GradientTileRange { let tile_range = gradient_tiles.open_range(); @@ -1293,22 +1271,21 @@ fn decompose_repeated_gradient( let repetitions = image_tiling::repetitions(prim_local_rect, &visible_rect, stride); gradient_tiles.reserve(repetitions.num_repetitions()); for Repetition { origin, .. } in repetitions { - let mut handle = GpuCacheHandle::new(); let rect = LayoutRect::from_origin_and_size( origin, *stretch_size, ); + let mut address = GpuBufferAddress::INVALID; + if let Some(callback) = &mut callback { - if let Some(request) = frame_state.gpu_cache.request(&mut handle) { - callback(&rect, request); - } + address = callback(&rect, &mut frame_state.frame_gpu_data.f32); } gradient_tiles.push(VisibleGradientTile { local_rect: rect, local_clip_rect: tight_clip_rect, - handle + address, }); } } @@ -1861,7 +1838,7 @@ fn build_segments_if_needed( let instance = SegmentedInstance { segments_range, - gpu_cache_handle: GpuCacheHandle::new(), + gpu_data: GpuBufferAddress::INVALID, }; *segment_instance_index = segment_instances_store.push(instance); diff --git a/gfx/wr/webrender/src/prim_store/borders.rs b/gfx/wr/webrender/src/prim_store/borders.rs @@ -6,16 +6,13 @@ use api::{NormalBorder, PremultipliedColorF, Shadow, RasterSpace}; use api::units::*; use crate::border::create_border_segments; use crate::border::NormalBorderAu; +use crate::renderer::GpuBufferWriterF; use crate::scene_building::{CreateShadow, IsVisible}; use crate::frame_builder::FrameBuildingState; -use crate::gpu_cache::GpuDataRequest; use crate::intern; use crate::internal_types::{LayoutPrimitiveInfo, FrameId}; use crate::prim_store::{ - BorderSegmentInfo, BrushSegment, NinePatchDescriptor, PrimKey, - PrimTemplate, PrimTemplateCommonData, - PrimitiveInstanceKind, PrimitiveOpacity, - PrimitiveStore, InternablePrimitive, + BorderSegmentInfo, BrushSegment, InternablePrimitive, NinePatchDescriptor, PrimKey, PrimTemplate, PrimTemplateCommonData, PrimitiveInstanceKind, PrimitiveOpacity, PrimitiveStore, VECS_PER_SEGMENT }; use crate::resource_cache::ImageRequest; use crate::render_task::RenderTask; @@ -67,25 +64,24 @@ impl NormalBorderData { common: &mut PrimTemplateCommonData, frame_state: &mut FrameBuildingState, ) { - if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) { - self.write_prim_gpu_blocks(request, common.prim_rect.size()); - self.write_segment_gpu_blocks(request); - } - + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + self.brush_segments.len() * VECS_PER_SEGMENT); + self.write_prim_gpu_blocks(&mut writer, common.prim_rect.size()); + self.write_segment_gpu_blocks(&mut writer); + common.gpu_buffer_address = writer.finish(); common.opacity = PrimitiveOpacity::translucent(); } fn write_prim_gpu_blocks( &self, - request: &mut GpuDataRequest, + writer: &mut GpuBufferWriterF, prim_size: LayoutSize ) { // Border primitives currently used for // image borders, and run through the // normal brush_image shader. - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ prim_size.width, prim_size.height, 0.0, @@ -95,14 +91,12 @@ impl NormalBorderData { fn write_segment_gpu_blocks( &self, - request: &mut GpuDataRequest, + writer: &mut GpuBufferWriterF, ) { for segment in &self.brush_segments { // has to match VECS_PER_SEGMENT - request.write_segment( - segment.local_rect, - segment.extra_data, - ); + writer.push_one(segment.local_rect); + writer.push_one(segment.extra_data); } } } @@ -245,10 +239,10 @@ impl ImageBorderData { common: &mut PrimTemplateCommonData, frame_state: &mut FrameBuildingState, ) { - if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) { - self.write_prim_gpu_blocks(request, &common.prim_rect.size()); - self.write_segment_gpu_blocks(request); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + self.brush_segments.len() * VECS_PER_SEGMENT); + self.write_prim_gpu_blocks(&mut writer, &common.prim_rect.size()); + self.write_segment_gpu_blocks(&mut writer); + common.gpu_buffer_address = writer.finish(); let frame_id = frame_state.rg_builder.frame_id(); if self.frame_id != frame_id { @@ -279,15 +273,15 @@ impl ImageBorderData { fn write_prim_gpu_blocks( &self, - request: &mut GpuDataRequest, + writer: &mut GpuBufferWriterF, prim_size: &LayoutSize, ) { // Border primitives currently used for // image borders, and run through the // normal brush_image shader. - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ prim_size.width, prim_size.height, 0.0, @@ -297,14 +291,12 @@ impl ImageBorderData { fn write_segment_gpu_blocks( &self, - request: &mut GpuDataRequest, + writer: &mut GpuBufferWriterF, ) { for segment in &self.brush_segments { // has to match VECS_PER_SEGMENT - request.write_segment( - segment.local_rect, - segment.extra_data, - ); + writer.push_one(segment.local_rect); + writer.push_one(segment.extra_data); } } } @@ -377,9 +369,9 @@ fn test_struct_sizes() { // (b) You made a structure larger. This is not necessarily a problem, but should only // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<NormalBorderPrim>(), 84, "NormalBorderPrim size changed"); - assert_eq!(mem::size_of::<NormalBorderTemplate>(), 216, "NormalBorderTemplate size changed"); + assert_eq!(mem::size_of::<NormalBorderTemplate>(), 208, "NormalBorderTemplate size changed"); assert_eq!(mem::size_of::<NormalBorderKey>(), 104, "NormalBorderKey size changed"); assert_eq!(mem::size_of::<ImageBorder>(), 68, "ImageBorder size changed"); - assert_eq!(mem::size_of::<ImageBorderTemplate>(), 104, "ImageBorderTemplate size changed"); + assert_eq!(mem::size_of::<ImageBorderTemplate>(), 96, "ImageBorderTemplate size changed"); assert_eq!(mem::size_of::<ImageBorderKey>(), 88, "ImageBorderKey size changed"); } diff --git a/gfx/wr/webrender/src/prim_store/gradient/conic.rs b/gfx/wr/webrender/src/prim_store/gradient/conic.rs @@ -17,7 +17,7 @@ use crate::scene_building::IsVisible; use crate::frame_builder::FrameBuildingState; use crate::intern::{Internable, InternDebug, Handle as InternHandle}; use crate::internal_types::LayoutPrimitiveInfo; -use crate::prim_store::{BrushSegment, GradientTileRange}; +use crate::prim_store::{BrushSegment, GradientTileRange, VECS_PER_SEGMENT}; use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity, FloatKey}; use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore}; use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive}; @@ -261,27 +261,23 @@ impl ConicGradientTemplate { &mut self, frame_state: &mut FrameBuildingState, ) { - if let Some(mut request) = - frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) { - // write_prim_gpu_blocks - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ - self.stretch_size.width, - self.stretch_size.height, - 0.0, - 0.0, - ]); - - // write_segment_gpu_blocks - for segment in &self.brush_segments { - // has to match VECS_PER_SEGMENT - request.write_segment( - segment.local_rect, - segment.extra_data, - ); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + self.brush_segments.len() * VECS_PER_SEGMENT); + // write_prim_gpu_blocks + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ + self.stretch_size.width, + self.stretch_size.height, + 0.0, + 0.0, + ]); + // write_segment_gpu_blocks + for segment in &self.brush_segments { + // has to match VECS_PER_SEGMENT + writer.push_one(segment.local_rect); + writer.push_one(segment.extra_data); } + self.common.gpu_buffer_address = writer.finish(); let cache_key = ConicGradientCacheKey { size: self.task_size, diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs @@ -19,7 +19,7 @@ use crate::frame_builder::FrameBuildingState; use crate::intern::{Internable, InternDebug, Handle as InternHandle}; use crate::internal_types::LayoutPrimitiveInfo; use crate::image_tiling::simplify_repeated_primitive; -use crate::prim_store::{BrushSegment, GradientTileRange}; +use crate::prim_store::{BrushSegment, GradientTileRange, VECS_PER_SEGMENT}; use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity}; use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore}; use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive}; @@ -494,47 +494,44 @@ impl LinearGradientTemplate { &mut self, frame_state: &mut FrameBuildingState, ) { - if let Some(mut request) = frame_state.gpu_cache.request( - &mut self.common.gpu_cache_handle - ) { - - // Write_prim_gpu_blocks - if self.cached { - // We are using the image brush. - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ - self.stretch_size.width, - self.stretch_size.height, - 0.0, - 0.0, - ]); - } else { - // We are using the gradient brush. - request.push([ - self.start_point.x, - self.start_point.y, - self.end_point.x, - self.end_point.y, - ]); - request.push([ - pack_as_float(self.extend_mode as u32), - self.stretch_size.width, - self.stretch_size.height, - 0.0, - ]); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + self.brush_segments.len() * VECS_PER_SEGMENT); + + // Write_prim_gpu_blocks + if self.cached { + // We are using the image brush. + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ + self.stretch_size.width, + self.stretch_size.height, + 0.0, + 0.0, + ]); + } else { + // We are using the gradient brush. + writer.push_one([ + self.start_point.x, + self.start_point.y, + self.end_point.x, + self.end_point.y, + ]); + writer.push_one([ + pack_as_float(self.extend_mode as u32), + self.stretch_size.width, + self.stretch_size.height, + 0.0, + ]); + } - // write_segment_gpu_blocks - for segment in &self.brush_segments { - // has to match VECS_PER_SEGMENT - request.write_segment( - segment.local_rect, - segment.extra_data, - ); - } + // write_segment_gpu_blocks + for segment in &self.brush_segments { + // has to match VECS_PER_SEGMENT + writer.push_one(segment.local_rect); + writer.push_one(segment.extra_data); } + self.common.gpu_buffer_address = writer.finish(); + // Tile spacing is always handled by decomposing into separate draw calls so the // primitive opacity is equivalent to stops opacity. This might change to being // set to non-opaque in the presence of tile spacing if/when tile spacing is handled diff --git a/gfx/wr/webrender/src/prim_store/gradient/mod.rs b/gfx/wr/webrender/src/prim_store/gradient/mod.rs @@ -544,14 +544,14 @@ fn test_struct_sizes() { // (b) You made a structure larger. This is not necessarily a problem, but should only // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed"); - assert_eq!(mem::size_of::<LinearGradientTemplate>(), 144, "LinearGradientTemplate size changed"); + assert_eq!(mem::size_of::<LinearGradientTemplate>(), 136, "LinearGradientTemplate size changed"); assert_eq!(mem::size_of::<LinearGradientKey>(), 96, "LinearGradientKey size changed"); assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed"); - assert_eq!(mem::size_of::<RadialGradientTemplate>(), 144, "RadialGradientTemplate size changed"); + assert_eq!(mem::size_of::<RadialGradientTemplate>(), 136, "RadialGradientTemplate size changed"); assert_eq!(mem::size_of::<RadialGradientKey>(), 96, "RadialGradientKey size changed"); assert_eq!(mem::size_of::<ConicGradient>(), 72, "ConicGradient size changed"); - assert_eq!(mem::size_of::<ConicGradientTemplate>(), 144, "ConicGradientTemplate size changed"); + assert_eq!(mem::size_of::<ConicGradientTemplate>(), 136, "ConicGradientTemplate size changed"); assert_eq!(mem::size_of::<ConicGradientKey>(), 96, "ConicGradientKey size changed"); } diff --git a/gfx/wr/webrender/src/prim_store/gradient/radial.rs b/gfx/wr/webrender/src/prim_store/gradient/radial.rs @@ -17,7 +17,7 @@ use crate::scene_building::IsVisible; use crate::frame_builder::FrameBuildingState; use crate::intern::{Internable, InternDebug, Handle as InternHandle}; use crate::internal_types::LayoutPrimitiveInfo; -use crate::prim_store::{BrushSegment, GradientTileRange, InternablePrimitive}; +use crate::prim_store::{BrushSegment, GradientTileRange, InternablePrimitive, VECS_PER_SEGMENT}; use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity}; use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore}; use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, FloatKey}; @@ -228,27 +228,24 @@ impl RadialGradientTemplate { &mut self, frame_state: &mut FrameBuildingState, ) { - if let Some(mut request) = - frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) { - // write_prim_gpu_blocks - request.push(PremultipliedColorF::WHITE); - request.push(PremultipliedColorF::WHITE); - request.push([ - self.stretch_size.width, - self.stretch_size.height, - 0.0, - 0.0, - ]); - - // write_segment_gpu_blocks - for segment in &self.brush_segments { - // has to match VECS_PER_SEGMENT - request.write_segment( - segment.local_rect, - segment.extra_data, - ); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3 + self.brush_segments.len() * VECS_PER_SEGMENT); + + // write_prim_gpu_blocks + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ + self.stretch_size.width, + self.stretch_size.height, + 0.0, + 0.0, + ]); + // write_segment_gpu_blocks + for segment in &self.brush_segments { + // has to match VECS_PER_SEGMENT + writer.push_one(segment.local_rect); + writer.push_one(segment.extra_data); } + self.common.gpu_buffer_address = writer.finish(); let task_size = self.task_size; let cache_key = RadialGradientCacheKey { diff --git a/gfx/wr/webrender/src/prim_store/image.rs b/gfx/wr/webrender/src/prim_store/image.rs @@ -10,10 +10,9 @@ use api::{ use api::units::*; use euclid::point2; use crate::composite::CompositorSurfaceKind; -use crate::renderer::GpuBufferBuilderF; +use crate::renderer::{GpuBufferBuilderF, GpuBufferWriterF}; use crate::scene_building::{CreateShadow, IsVisible}; use crate::frame_builder::{FrameBuildingContext, FrameBuildingState}; -use crate::gpu_cache::{GpuDataRequest}; use crate::intern::{Internable, InternDebug, Handle as InternHandle}; use crate::internal_types::LayoutPrimitiveInfo; use crate::prim_store::{ @@ -391,19 +390,19 @@ impl ImageData { ); } - if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) { - self.write_prim_gpu_blocks(&image_instance.adjustment, &mut request); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3); + self.write_prim_gpu_blocks(&image_instance.adjustment, &mut writer); + common.gpu_buffer_address = writer.finish(); } - pub fn write_prim_gpu_blocks(&self, adjustment: &AdjustedImageSource, request: &mut GpuDataRequest) { + pub fn write_prim_gpu_blocks(&self, adjustment: &AdjustedImageSource, writer: &mut GpuBufferWriterF) { let stretch_size = adjustment.map_stretch_size(self.stretch_size); // Images are drawn as a white color, modulated by the total // opacity coming from any collapsed property bindings. // Size has to match `VECS_PER_SPECIFIC_BRUSH` from `brush_image.glsl` exactly. - request.push(self.color.premultiplied()); - request.push(PremultipliedColorF::WHITE); - request.push([ + writer.push_one(self.color.premultiplied()); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ stretch_size.width + self.tile_spacing.width, stretch_size.height + self.tile_spacing.height, 0.0, @@ -688,11 +687,11 @@ impl YuvImageData { self.src_yuv[channel] = Some(task_id); } - if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) { - self.write_prim_gpu_blocks(&mut request); - }; + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(1); + self.write_prim_gpu_blocks(&mut writer); + common.gpu_buffer_address = writer.finish(); - // YUV images never have transparency + // YUV images never have transparency common.opacity = PrimitiveOpacity::opaque(); } @@ -715,9 +714,9 @@ impl YuvImageData { } } - pub fn write_prim_gpu_blocks(&self, request: &mut GpuDataRequest) { + pub fn write_prim_gpu_blocks(&self, writer: &mut GpuBufferWriterF) { let ranged_color_space = self.color_space.with_range(self.color_range); - request.push([ + writer.push_one([ pack_as_float(self.color_depth.bit_depth()), pack_as_float(ranged_color_space as u32), pack_as_float(self.format as u32), @@ -786,9 +785,9 @@ fn test_struct_sizes() { // (b) You made a structure larger. This is not necessarily a problem, but should only // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<Image>(), 32, "Image size changed"); - assert_eq!(mem::size_of::<ImageTemplate>(), 72, "ImageTemplate size changed"); + assert_eq!(mem::size_of::<ImageTemplate>(), 68, "ImageTemplate size changed"); assert_eq!(mem::size_of::<ImageKey>(), 52, "ImageKey size changed"); assert_eq!(mem::size_of::<YuvImage>(), 32, "YuvImage size changed"); - assert_eq!(mem::size_of::<YuvImageTemplate>(), 84, "YuvImageTemplate size changed"); + assert_eq!(mem::size_of::<YuvImageTemplate>(), 80, "YuvImageTemplate size changed"); assert_eq!(mem::size_of::<YuvImageKey>(), 52, "YuvImageKey size changed"); } diff --git a/gfx/wr/webrender/src/prim_store/line_dec.rs b/gfx/wr/webrender/src/prim_store/line_dec.rs @@ -7,9 +7,9 @@ use api::{ LineOrientation, LineStyle, PremultipliedColorF, Shadow, }; use api::units::*; +use crate::renderer::GpuBufferWriterF; use crate::scene_building::{CreateShadow, IsVisible}; use crate::frame_builder::FrameBuildingState; -use crate::gpu_cache::GpuDataRequest; use crate::intern; use crate::internal_types::LayoutPrimitiveInfo; use crate::prim_store::{ @@ -78,20 +78,20 @@ impl LineDecorationData { common: &mut PrimTemplateCommonData, frame_state: &mut FrameBuildingState, ) { - if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) { - self.write_prim_gpu_blocks(request); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(3); + self.write_prim_gpu_blocks(&mut writer); + common.gpu_buffer_address = writer.finish(); } fn write_prim_gpu_blocks( &self, - request: &mut GpuDataRequest + writer: &mut GpuBufferWriterF ) { match self.cache_key.as_ref() { Some(cache_key) => { - request.push(self.color.premultiplied()); - request.push(PremultipliedColorF::WHITE); - request.push([ + writer.push_one(self.color.premultiplied()); + writer.push_one(PremultipliedColorF::WHITE); + writer.push_one([ cache_key.size.width.to_f32_px(), cache_key.size.height.to_f32_px(), 0.0, @@ -99,7 +99,7 @@ impl LineDecorationData { ]); } None => { - request.push(self.color.premultiplied()); + writer.push_one(self.color.premultiplied()); } } } @@ -251,6 +251,6 @@ fn test_struct_sizes() { // (b) You made a structure larger. This is not necessarily a problem, but should only // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<LineDecoration>(), 20, "LineDecoration size changed"); - assert_eq!(mem::size_of::<LineDecorationTemplate>(), 60, "LineDecorationTemplate size changed"); + assert_eq!(mem::size_of::<LineDecorationTemplate>(), 56, "LineDecorationTemplate size changed"); assert_eq!(mem::size_of::<LineDecorationKey>(), 40, "LineDecorationKey size changed"); } diff --git a/gfx/wr/webrender/src/prim_store/mod.rs b/gfx/wr/webrender/src/prim_store/mod.rs @@ -13,7 +13,7 @@ use crate::composite::CompositorSurfaceKind; use crate::clip::ClipLeafId; use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState}; use crate::quad::QuadTileClassifier; -use crate::renderer::GpuBufferAddress; +use crate::renderer::{GpuBufferAddress, GpuBufferWriterF}; use crate::segment::EdgeAaSegmentMask; use crate::border::BorderSegmentCacheKey; use crate::debug_item::{DebugItem, DebugMessage}; @@ -21,7 +21,6 @@ use crate::debug_colors; use crate::scene_building::{CreateShadow, IsVisible}; use crate::frame_builder::FrameBuildingState; use glyph_rasterizer::GlyphKey; -use crate::gpu_cache::{GpuCacheHandle, GpuDataRequest}; use crate::gpu_types::{BrushFlags, QuadSegment}; use crate::intern; use crate::picture::PicturePrimitive; @@ -489,16 +488,16 @@ impl PrimitiveTemplateKind { /// Write any GPU blocks for the primitive template to the given request object. pub fn write_prim_gpu_blocks( &self, - request: &mut GpuDataRequest, + writer: &mut GpuBufferWriterF, scene_properties: &SceneProperties, ) { match *self { PrimitiveTemplateKind::Clear => { // Opaque black with operator dest out - request.push(PremultipliedColorF::BLACK); + writer.push_one(PremultipliedColorF::BLACK); } PrimitiveTemplateKind::Rectangle { ref color, .. } => { - request.push(scene_properties.resolve_color(color).premultiplied()) + writer.push_one(scene_properties.resolve_color(color).premultiplied()) } } } @@ -531,11 +530,12 @@ pub struct PrimTemplateCommonData { pub may_need_repetition: bool, pub prim_rect: LayoutRect, pub opacity: PrimitiveOpacity, - /// The GPU cache handle for a primitive template. Since this structure - /// is retained across display lists by interning, this GPU cache handle - /// also remains valid, which reduces the number of updates to the GPU - /// cache when a new display list is processed. - pub gpu_cache_handle: GpuCacheHandle, + /// Address of the per-primitive data in the GPU cache. + /// + /// TODO: This is only valid during the current frame and must + /// be overwritten each frame. We should move this out of the + /// common data to avoid accidental reuse. + pub gpu_buffer_address: GpuBufferAddress, /// Specifies the edges that are *allowed* to have anti-aliasing. /// In other words EdgeAaSegmentFlags::all() does not necessarily mean all edges will /// be anti-aliased, only that they could be. @@ -550,7 +550,7 @@ impl PrimTemplateCommonData { flags: common.flags, may_need_repetition: true, prim_rect: common.prim_rect.into(), - gpu_cache_handle: GpuCacheHandle::new(), + gpu_buffer_address: GpuBufferAddress::INVALID, opacity: PrimitiveOpacity::translucent(), edge_aa_mask: EdgeAaSegmentMask::all(), } @@ -640,9 +640,9 @@ impl PrimitiveTemplate { frame_state: &mut FrameBuildingState, scene_properties: &SceneProperties, ) { - if let Some(mut request) = frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) { - self.kind.write_prim_gpu_blocks(&mut request, scene_properties); - } + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(1); + self.kind.write_prim_gpu_blocks(&mut writer, scene_properties); + self.common.gpu_buffer_address = writer.finish(); self.opacity = match self.kind { PrimitiveTemplateKind::Clear => { @@ -713,7 +713,7 @@ pub struct VisibleMaskImageTile { #[derive(Debug)] #[cfg_attr(feature = "capture", derive(Serialize))] pub struct VisibleGradientTile { - pub handle: GpuCacheHandle, + pub address: GpuBufferAddress, pub local_rect: LayoutRect, pub local_clip_rect: LayoutRect, } @@ -1203,7 +1203,7 @@ impl PrimitiveInstance { #[cfg_attr(feature = "capture", derive(Serialize))] #[derive(Debug)] pub struct SegmentedInstance { - pub gpu_cache_handle: GpuCacheHandle, + pub gpu_data: GpuBufferAddress, pub segments_range: SegmentsRange, } @@ -1556,7 +1556,7 @@ fn test_struct_sizes() { // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<PrimitiveInstance>(), 88, "PrimitiveInstance size changed"); assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 24, "PrimitiveInstanceKind size changed"); - assert_eq!(mem::size_of::<PrimitiveTemplate>(), 56, "PrimitiveTemplate size changed"); + assert_eq!(mem::size_of::<PrimitiveTemplate>(), 52, "PrimitiveTemplate size changed"); assert_eq!(mem::size_of::<PrimitiveTemplateKind>(), 28, "PrimitiveTemplateKind size changed"); assert_eq!(mem::size_of::<PrimitiveKey>(), 36, "PrimitiveKey size changed"); assert_eq!(mem::size_of::<PrimitiveKeyKind>(), 16, "PrimitiveKeyKind size changed"); diff --git a/gfx/wr/webrender/src/prim_store/text_run.rs b/gfx/wr/webrender/src/prim_store/text_run.rs @@ -135,32 +135,32 @@ impl TextRunTemplate { &mut self, frame_state: &mut FrameBuildingState, ) { - // corresponds to `fetch_glyph` in the shaders - if let Some(mut request) = frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) { - request.push(ColorF::from(self.font.color).premultiplied()); - - let mut gpu_block = [0.0; 4]; - for (i, src) in self.glyphs.iter().enumerate() { - // Two glyphs are packed per GPU block. - - if (i & 1) == 0 { - gpu_block[0] = src.point.x; - gpu_block[1] = src.point.y; - } else { - gpu_block[2] = src.point.x; - gpu_block[3] = src.point.y; - request.push(gpu_block); - } - } - - // Ensure the last block is added in the case - // of an odd number of glyphs. - if (self.glyphs.len() & 1) != 0 { - request.push(gpu_block); + // Corresponds to `fetch_glyph` in the shaders. + let num_blocks = (self.glyphs.len() + 1) / 2 + 1; + assert!(num_blocks <= MAX_VERTEX_TEXTURE_WIDTH); + let mut writer = frame_state.frame_gpu_data.f32.write_blocks(num_blocks); + writer.push_one(ColorF::from(self.font.color).premultiplied()); + + let mut gpu_block = [0.0; 4]; + for (i, src) in self.glyphs.iter().enumerate() { + // Two glyphs are packed per GPU block. + if (i & 1) == 0 { + gpu_block[0] = src.point.x; + gpu_block[1] = src.point.y; + } else { + gpu_block[2] = src.point.x; + gpu_block[3] = src.point.y; + writer.push_one(gpu_block); } + } - assert!(request.current_used_block_num() <= MAX_VERTEX_TEXTURE_WIDTH); + // Ensure the last block is added in the case + // of an odd number of glyphs. + if (self.glyphs.len() & 1) != 0 { + writer.push_one(gpu_block); } + + self.common.gpu_buffer_address = writer.finish(); } } @@ -523,7 +523,7 @@ fn test_struct_sizes() { // (b) You made a structure larger. This is not necessarily a problem, but should only // be done with care, and after checking if talos performance regresses badly. assert_eq!(mem::size_of::<TextRun>(), 88, "TextRun size changed"); - assert_eq!(mem::size_of::<TextRunTemplate>(), 96, "TextRunTemplate size changed"); + assert_eq!(mem::size_of::<TextRunTemplate>(), 88, "TextRunTemplate size changed"); assert_eq!(mem::size_of::<TextRunKey>(), 104, "TextRunKey size changed"); assert_eq!(mem::size_of::<TextRunPrimitive>(), 80, "TextRunPrimitive size changed"); } diff --git a/gfx/wr/webrender/src/quad.rs b/gfx/wr/webrender/src/quad.rs @@ -1219,8 +1219,8 @@ pub fn add_to_batch<F>( let mut instance = QuadInstance { dst_task_address, - prim_address_i, - prim_address_f, + prim_address_i: prim_address_i.as_int(), + prim_address_f: prim_address_f.as_int(), edge_flags: edge_flags_bits, quad_flags: quad_flags.bits(), part_index: PartIndex::All as u8, diff --git a/gfx/wr/webrender/src/render_target.rs b/gfx/wr/webrender/src/render_target.rs @@ -13,7 +13,6 @@ use crate::segment::EdgeAaSegmentMask; use crate::spatial_tree::SpatialTree; use crate::clip::{ClipStore, ClipItemKind}; use crate::frame_builder::FrameGlobalResources; -use crate::gpu_cache::GpuCache; use crate::gpu_types::{BorderInstance, SvgFilterInstance, SVGFEFilterInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance}; use crate::gpu_types::{TransformPalette, ZBufferIdGenerator, MaskInstance, ClipSpace, BlurEdgeMode}; use crate::gpu_types::{ZBufferId, QuadSegment, PrimitiveInstanceData, TransformPaletteId}; @@ -108,7 +107,6 @@ impl RenderTargetList { pub fn build( &mut self, ctx: &mut RenderTargetContext, - gpu_cache: &mut GpuCache, render_tasks: &RenderTaskGraph, prim_headers: &mut PrimitiveHeaders, transforms: &mut TransformPalette, @@ -124,7 +122,6 @@ impl RenderTargetList { for target in &mut self.targets { target.build( ctx, - gpu_cache, render_tasks, prim_headers, transforms, @@ -256,7 +253,6 @@ impl RenderTarget { pub fn build( &mut self, ctx: &mut RenderTargetContext, - gpu_cache: &mut GpuCache, render_tasks: &RenderTaskGraph, prim_headers: &mut PrimitiveHeaders, transforms: &mut TransformPalette, @@ -313,7 +309,6 @@ impl RenderTarget { cmd, spatial_node_index, ctx, - gpu_cache, render_tasks, prim_headers, transforms, @@ -751,7 +746,7 @@ fn add_svg_filter_instances( input_count, generic_int, padding: 0, - extra_data_address: extra_data_address.unwrap_or(GpuBufferAddress::INVALID), + extra_data_address: extra_data_address.unwrap_or(GpuBufferAddress::INVALID).as_int(), }; for (ref mut batch_textures, ref mut batch) in instances.iter_mut() { @@ -806,7 +801,7 @@ fn add_svg_filter_node_instances( input_2_task_address: RenderTaskId::INVALID.into(), kind: 0, input_count: node.inputs.len() as u16, - extra_data_address: extra_data_address.unwrap_or(GpuBufferAddress::INVALID), + extra_data_address: extra_data_address.unwrap_or(GpuBufferAddress::INVALID).as_int(), }; // Must match FILTER_* in cs_svg_filter_node.glsl diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs @@ -132,7 +132,7 @@ pub use debug::DebugRenderer; pub use shade::{PendingShadersToPrecache, Shaders, SharedShaders}; pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH}; pub use gpu_buffer::{GpuBuffer, GpuBufferF, GpuBufferBuilderF, GpuBufferI, GpuBufferBuilderI}; -pub use gpu_buffer::{GpuBufferAddress, GpuBufferBuilder, GpuBufferWriterF}; +pub use gpu_buffer::{GpuBufferAddress, GpuBufferBuilder, GpuBufferWriterF, GpuBufferBlockF}; /// The size of the array of each type of vertex data texture that /// is round-robin-ed each frame during bind_frame_data. Doing this diff --git a/gfx/wr/webrender/src/renderer/vertex.rs b/gfx/wr/webrender/src/renderer/vertex.rs @@ -479,8 +479,8 @@ pub mod desc { // specific clip attributes VertexAttribute { name: "aClipDataResourceAddress", - count: 2, - kind: VertexAttributeKind::U16, + count: 1, + kind: VertexAttributeKind::I32, }, VertexAttribute { name: "aClipSrcRectSize", @@ -578,8 +578,8 @@ pub mod desc { }, VertexAttribute { name: "aFilterExtraDataAddress", - count: 2, - kind: VertexAttributeKind::U16, + count: 1, + kind: VertexAttributeKind::I32, }, ], }; @@ -628,8 +628,8 @@ pub mod desc { }, VertexAttribute { name: "aFilterExtraDataAddress", - count: 2, - kind: VertexAttributeKind::U16, + count: 1, + kind: VertexAttributeKind::I32, }, ], }; diff --git a/gfx/wr/webrender/src/resource_cache.rs b/gfx/wr/webrender/src/resource_cache.rs @@ -1290,7 +1290,6 @@ impl ResourceCache { if let Some(entry) = glyph_key_cache.try_get(key) { match entry { GlyphCacheEntry::Cached(ref glyph) => { - // Skip the glyph if it is already has a valid texture cache handle. if !texture_cache.request(&glyph.texture_cache_handle, gpu_buffer) { return false; } diff --git a/gfx/wr/webrender/src/texture_cache.rs b/gfx/wr/webrender/src/texture_cache.rs @@ -849,7 +849,7 @@ impl TextureCache { }; entry.map_or(true, |entry| { // If an image is requested that is already in the cache, - // refresh the GPU cache data associated with this item. + // refresh the GPU buffer data associated with this item. entry.last_access = now; entry.write_gpu_blocks(gpu_buffer); false