render_task_graph.rs (48550B)
1 // This Source Code Form is subject to the terms of the Mozilla Public 2 // License, v. 2.0. If a copy of the MPL was not distributed with this 3 // file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 //! This module contains the render task graph. 6 //! 7 //! Code associated with creating specific render tasks is in the render_task 8 //! module. 9 10 use api::units::*; 11 use api::ImageFormat; 12 use crate::gpu_types::ImageSource; 13 use crate::internal_types::{TextureSource, CacheTextureId, FastHashMap, FastHashSet, FrameId}; 14 use crate::internal_types::size_of_frame_vec; 15 use crate::render_task::{StaticRenderTaskSurface, RenderTaskLocation, RenderTask}; 16 use crate::render_target::RenderTargetKind; 17 use crate::render_task::{RenderTaskData, RenderTaskKind}; 18 use crate::renderer::GpuBufferAddress; 19 use crate::renderer::GpuBufferBuilder; 20 use crate::resource_cache::ResourceCache; 21 use crate::texture_pack::GuillotineAllocator; 22 use crate::prim_store::DeferredResolve; 23 use crate::image_source::{resolve_image, resolve_cached_render_task}; 24 use smallvec::SmallVec; 25 use topological_sort::TopologicalSort; 26 27 use crate::render_target::{RenderTargetList, PictureCacheTarget, RenderTarget}; 28 use crate::util::{Allocation, VecHelper}; 29 use std::{usize, f32}; 30 31 use crate::internal_types::{FrameVec, FrameMemory}; 32 33 #[cfg(test)] 34 use crate::frame_allocator::FrameAllocator; 35 36 /// If we ever need a larger texture than the ideal, we better round it up to a 37 /// reasonable number in order to have a bit of leeway in case the size of this 38 /// this target is changing each frame. 39 const TEXTURE_DIMENSION_MASK: i32 = 0xFF; 40 41 /// Allows initializing a render task directly into the render task buffer. 42 /// 43 /// See utils::VecHelpers. RenderTask is fairly large so avoiding the move when 44 /// pushing into the vector can save a lot of expensive memcpys on pages with many 45 /// render tasks. 46 pub struct RenderTaskAllocation<'a> { 47 pub alloc: Allocation<'a, RenderTask>, 48 } 49 50 impl<'l> RenderTaskAllocation<'l> { 51 #[inline(always)] 52 pub fn init(self, value: RenderTask) -> RenderTaskId { 53 RenderTaskId { 54 index: self.alloc.init(value) as u32, 55 } 56 } 57 } 58 59 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 60 #[derive(MallocSizeOf)] 61 #[cfg_attr(feature = "capture", derive(Serialize))] 62 #[cfg_attr(feature = "replay", derive(Deserialize))] 63 pub struct RenderTaskId { 64 pub index: u32, 65 } 66 67 impl RenderTaskId { 68 pub const INVALID: RenderTaskId = RenderTaskId { 69 index: u32::MAX, 70 }; 71 } 72 73 #[cfg_attr(feature = "capture", derive(Serialize))] 74 #[cfg_attr(feature = "replay", derive(Deserialize))] 75 #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord)] 76 pub struct PassId(usize); 77 78 impl PassId { 79 pub const MIN: PassId = PassId(0); 80 pub const MAX: PassId = PassId(!0 - 1); 81 pub const INVALID: PassId = PassId(!0 - 2); 82 } 83 84 /// An internal representation of a dynamic surface that tasks can be 85 /// allocated into. Maintains some extra metadata about each surface 86 /// during the graph build. 87 #[cfg_attr(feature = "capture", derive(Serialize))] 88 #[cfg_attr(feature = "replay", derive(Deserialize))] 89 struct Surface { 90 /// Whether this is a color or alpha render target 91 kind: RenderTargetKind, 92 /// Allocator for this surface texture 93 allocator: GuillotineAllocator, 94 /// We can only allocate into this for reuse if it's a shared surface 95 is_shared: bool, 96 /// The pass that we can free this surface after (guaranteed 97 /// to be the same for all tasks assigned to this surface) 98 free_after: PassId, 99 } 100 101 impl Surface { 102 /// Allocate a rect within a shared surfce. Returns None if the 103 /// format doesn't match, or allocation fails. 104 fn alloc_rect( 105 &mut self, 106 size: DeviceIntSize, 107 kind: RenderTargetKind, 108 is_shared: bool, 109 free_after: PassId, 110 ) -> Option<DeviceIntPoint> { 111 if self.kind == kind && self.is_shared == is_shared && self.free_after == free_after { 112 self.allocator 113 .allocate(&size) 114 .map(|(_slice, origin)| origin) 115 } else { 116 None 117 } 118 } 119 } 120 121 /// A sub-pass can draw to either a dynamic (temporary render target) surface, 122 /// or a persistent surface (texture or picture cache). 123 #[cfg_attr(feature = "capture", derive(Serialize))] 124 #[cfg_attr(feature = "replay", derive(Deserialize))] 125 #[derive(Debug)] 126 pub enum SubPassSurface { 127 /// A temporary (intermediate) surface. 128 Dynamic { 129 /// The renderer texture id 130 texture_id: CacheTextureId, 131 /// Color / alpha render target 132 target_kind: RenderTargetKind, 133 /// The rectangle occupied by tasks in this surface. Used as a clear 134 /// optimization on some GPUs. 135 used_rect: DeviceIntRect, 136 }, 137 Persistent { 138 /// Reference to the texture or picture cache surface being drawn to. 139 surface: StaticRenderTaskSurface, 140 }, 141 } 142 143 /// A subpass is a specific render target, and a list of tasks to draw to it. 144 #[cfg_attr(feature = "capture", derive(Serialize))] 145 #[cfg_attr(feature = "replay", derive(Deserialize))] 146 pub struct SubPass { 147 /// The surface this subpass draws to 148 pub surface: SubPassSurface, 149 /// The tasks assigned to this subpass. 150 pub task_ids: FrameVec<RenderTaskId>, 151 } 152 153 /// A pass expresses dependencies between tasks. Each pass consists of a number 154 /// of subpasses. 155 #[cfg_attr(feature = "capture", derive(Serialize))] 156 #[cfg_attr(feature = "replay", derive(Deserialize))] 157 pub struct Pass { 158 /// The tasks assigned to this render pass 159 pub task_ids: FrameVec<RenderTaskId>, 160 /// The subpasses that make up this dependency pass 161 pub sub_passes: FrameVec<SubPass>, 162 /// A list of intermediate surfaces that can be invalidated after 163 /// this pass completes. 164 pub textures_to_invalidate: FrameVec<CacheTextureId>, 165 } 166 167 /// The RenderTaskGraph is the immutable representation of the render task graph. It is 168 /// built by the RenderTaskGraphBuilder, and is constructed once per frame. 169 #[cfg_attr(feature = "capture", derive(Serialize))] 170 #[cfg_attr(feature = "replay", derive(Deserialize))] 171 pub struct RenderTaskGraph { 172 /// List of tasks added to the graph 173 pub tasks: FrameVec<RenderTask>, 174 175 /// The passes that were created, based on dependencies between tasks 176 pub passes: FrameVec<Pass>, 177 178 /// Current frame id, used for debug validation 179 frame_id: FrameId, 180 181 /// GPU specific data for each task that is made available to shaders 182 pub task_data: FrameVec<RenderTaskData>, 183 184 /// Total number of intermediate surfaces that will be drawn to, used for test validation. 185 #[cfg(test)] 186 surface_count: usize, 187 188 /// Total number of real allocated textures that will be drawn to, used for test validation. 189 #[cfg(test)] 190 unique_surfaces: FastHashSet<CacheTextureId>, 191 } 192 193 /// The persistent interface that is used during frame building to construct the 194 /// frame graph. 195 pub struct RenderTaskGraphBuilder { 196 /// List of tasks added to the builder 197 tasks: Vec<RenderTask>, 198 199 /// List of task roots 200 roots: FastHashSet<RenderTaskId>, 201 202 /// Current frame id, used for debug validation 203 frame_id: FrameId, 204 205 /// A list of texture surfaces that can be freed at the end of a pass. Retained 206 /// here to reduce heap allocations. 207 textures_to_free: FastHashSet<CacheTextureId>, 208 209 // Keep a map of `texture_id` to metadata about surfaces that are currently 210 // borrowed from the render target pool. 211 active_surfaces: FastHashMap<CacheTextureId, Surface>, 212 } 213 214 impl RenderTaskGraphBuilder { 215 /// Construct a new graph builder. Typically constructed once and maintained 216 /// over many frames, to avoid extra heap allocations where possible. 217 pub fn new() -> Self { 218 RenderTaskGraphBuilder { 219 tasks: Vec::new(), 220 roots: FastHashSet::default(), 221 frame_id: FrameId::INVALID, 222 textures_to_free: FastHashSet::default(), 223 active_surfaces: FastHashMap::default(), 224 } 225 } 226 227 pub fn frame_id(&self) -> FrameId { 228 self.frame_id 229 } 230 231 /// Begin a new frame 232 pub fn begin_frame(&mut self, frame_id: FrameId) { 233 self.frame_id = frame_id; 234 self.roots.clear(); 235 } 236 237 /// Get immutable access to a task 238 // TODO(gw): There's only a couple of places that existing code needs to access 239 // a task during the building step. Perhaps we can remove this? 240 pub fn get_task( 241 &self, 242 task_id: RenderTaskId, 243 ) -> &RenderTask { 244 &self.tasks[task_id.index as usize] 245 } 246 247 /// Get mutable access to a task 248 // TODO(gw): There's only a couple of places that existing code needs to access 249 // a task during the building step. Perhaps we can remove this? 250 pub fn get_task_mut( 251 &mut self, 252 task_id: RenderTaskId, 253 ) -> &mut RenderTask { 254 &mut self.tasks[task_id.index as usize] 255 } 256 257 /// Add a new task to the graph. 258 pub fn add(&mut self) -> RenderTaskAllocation { 259 // Assume every task is a root to start with 260 self.roots.insert( 261 RenderTaskId { index: self.tasks.len() as u32 } 262 ); 263 264 RenderTaskAllocation { 265 alloc: self.tasks.alloc(), 266 } 267 } 268 269 /// Express a dependency, such that `task_id` depends on `input` as a texture source. 270 pub fn add_dependency( 271 &mut self, 272 task_id: RenderTaskId, 273 input: RenderTaskId, 274 ) { 275 self.tasks[task_id.index as usize].children.push(input); 276 277 // Once a task is an input, it's no longer a root 278 self.roots.remove(&input); 279 } 280 281 /// End the graph building phase and produce the immutable task graph for this frame 282 pub fn end_frame( 283 &mut self, 284 resource_cache: &mut ResourceCache, 285 gpu_buffers: &mut GpuBufferBuilder, 286 deferred_resolves: &mut FrameVec<DeferredResolve>, 287 max_shared_surface_size: i32, 288 memory: &FrameMemory, 289 ) -> RenderTaskGraph { 290 // Copy the render tasks over to the immutable graph output 291 let task_count = self.tasks.len(); 292 293 // Copy from the frame_builder's task vector to the frame's instead of stealing it 294 // because they use different memory allocators. TODO: The builder should use the 295 // frame allocator, however since the builder lives longer than the frame, it's a 296 // bit more risky to do so. 297 let mut tasks = memory.new_vec_with_capacity(task_count); 298 for task in self.tasks.drain(..) { 299 tasks.push(task) 300 } 301 302 let mut graph = RenderTaskGraph { 303 tasks, 304 passes: memory.new_vec(), 305 task_data: memory.new_vec_with_capacity(task_count), 306 frame_id: self.frame_id, 307 #[cfg(test)] 308 surface_count: 0, 309 #[cfg(test)] 310 unique_surfaces: FastHashSet::default(), 311 }; 312 313 // First, use a topological sort of the dependency graph to split the task set in to 314 // a list of passes. This is necessary because when we have a complex graph (e.g. due 315 // to a large number of sibling backdrop-filter primitives) traversing it via a simple 316 // recursion can be too slow. The second pass determines when the last time a render task 317 // is used as an input, and assigns what pass the surface backing that render task can 318 // be freed (the surface is then returned to the render target pool and may be aliased 319 // or reused during subsequent passes). 320 321 let mut pass_count = 0; 322 let mut passes = memory.new_vec(); 323 let mut task_sorter = TopologicalSort::<RenderTaskId>::new(); 324 325 // Iterate the task list, and add all the dependencies to the topo sort 326 for (parent_id, task) in graph.tasks.iter().enumerate() { 327 let parent_id = RenderTaskId { index: parent_id as u32 }; 328 329 for child_id in &task.children { 330 task_sorter.add_dependency( 331 parent_id, 332 *child_id, 333 ); 334 } 335 } 336 337 // Pop the sorted passes off the topological sort 338 loop { 339 // Get the next set of tasks that can be drawn 340 let tasks = task_sorter.pop_all(); 341 342 // If there are no tasks left, we're done 343 if tasks.is_empty() { 344 // If the task sorter itself isn't empty but we couldn't pop off any 345 // tasks, that implies a circular dependency in the task graph 346 assert!(task_sorter.is_empty()); 347 break; 348 } else { 349 // Assign the `render_on` field to the task 350 for task_id in &tasks { 351 graph.tasks[task_id.index as usize].render_on = PassId(pass_count); 352 } 353 354 // Store the task list for this pass, used later for `assign_free_pass`. 355 passes.push(tasks); 356 pass_count += 1; 357 } 358 } 359 360 // Always create at least one pass for root tasks 361 pass_count = pass_count.max(1); 362 363 // Determine which pass each task can be freed on, which depends on which is 364 // the last task that has this as an input. This must be done in top-down 365 // pass order to ensure that RenderTaskLocation::Existing references are 366 // visited in the correct order 367 for pass in passes { 368 for task_id in pass { 369 assign_free_pass( 370 task_id, 371 &mut graph, 372 ); 373 } 374 } 375 376 // Construct passes array for tasks to be assigned to below 377 for _ in 0 .. pass_count { 378 graph.passes.push(Pass { 379 task_ids: memory.new_vec(), 380 sub_passes: memory.new_vec(), 381 textures_to_invalidate: memory.new_vec(), 382 }); 383 } 384 385 // Assign tasks to each pass based on their `render_on` attribute 386 for (index, task) in graph.tasks.iter().enumerate() { 387 if task.kind.is_a_rendering_operation() { 388 let id = RenderTaskId { index: index as u32 }; 389 graph.passes[task.render_on.0].task_ids.push(id); 390 } 391 } 392 393 // At this point, tasks are assigned to each dependency pass. Now we 394 // can go through each pass and create sub-passes, assigning each task 395 // to a target and destination rect. 396 assert!(self.active_surfaces.is_empty()); 397 398 for (pass_id, pass) in graph.passes.iter_mut().enumerate().rev() { 399 assert!(self.textures_to_free.is_empty()); 400 401 for task_id in &pass.task_ids { 402 403 let task_location = graph.tasks[task_id.index as usize].location.clone(); 404 405 match task_location { 406 RenderTaskLocation::Unallocated { size } => { 407 let task = &mut graph.tasks[task_id.index as usize]; 408 409 let mut location = None; 410 let kind = task.kind.target_kind(); 411 412 // If a task is used as part of an existing-chain then we can't 413 // safely share it (nor would we want to). 414 let can_use_shared_surface = 415 task.kind.can_use_shared_surface() && 416 task.free_after != PassId::INVALID; 417 418 if can_use_shared_surface { 419 // If we can use a shared surface, step through the existing shared 420 // surfaces for this subpass, and see if we can allocate the task 421 // to one of these targets. 422 for sub_pass in &mut pass.sub_passes { 423 if let SubPassSurface::Dynamic { texture_id, ref mut used_rect, .. } = sub_pass.surface { 424 let surface = self.active_surfaces.get_mut(&texture_id).unwrap(); 425 if let Some(p) = surface.alloc_rect(size, kind, true, task.free_after) { 426 location = Some((texture_id, p)); 427 *used_rect = used_rect.union(&DeviceIntRect::from_origin_and_size(p, size)); 428 sub_pass.task_ids.push(*task_id); 429 break; 430 } 431 } 432 } 433 } 434 435 if location.is_none() { 436 // If it wasn't possible to allocate the task to a shared surface, get a new 437 // render target from the resource cache pool/ 438 439 // If this is a really large task, don't bother allocating it as a potential 440 // shared surface for other tasks. 441 442 let can_use_shared_surface = can_use_shared_surface && 443 size.width <= max_shared_surface_size && 444 size.height <= max_shared_surface_size; 445 446 let surface_size = if can_use_shared_surface { 447 DeviceIntSize::new( 448 max_shared_surface_size, 449 max_shared_surface_size, 450 ) 451 } else { 452 // Round up size here to avoid constant re-allocs during resizing 453 DeviceIntSize::new( 454 (size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK, 455 (size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK, 456 ) 457 }; 458 459 if surface_size.is_empty() { 460 // We would panic in the guillotine allocator. Instead, panic here 461 // with some context. 462 let task_name = graph.tasks[task_id.index as usize].kind.as_str(); 463 panic!("{} render task has invalid size {:?}", task_name, surface_size); 464 } 465 466 let format = match kind { 467 RenderTargetKind::Color => ImageFormat::RGBA8, 468 RenderTargetKind::Alpha => ImageFormat::R8, 469 }; 470 471 // Get render target of appropriate size and format from resource cache 472 let texture_id = resource_cache.get_or_create_render_target_from_pool( 473 surface_size, 474 format, 475 ); 476 477 // Allocate metadata we need about this surface while it's active 478 let mut surface = Surface { 479 kind, 480 allocator: GuillotineAllocator::new(Some(surface_size)), 481 is_shared: can_use_shared_surface, 482 free_after: task.free_after, 483 }; 484 485 // Allocation of the task must fit in this new surface! 486 let p = surface.alloc_rect( 487 size, 488 kind, 489 can_use_shared_surface, 490 task.free_after, 491 ).expect("bug: alloc must succeed!"); 492 493 location = Some((texture_id, p)); 494 495 // Store the metadata about this newly active surface. We should never 496 // get a target surface with the same texture_id as a currently active surface. 497 let _prev_surface = self.active_surfaces.insert(texture_id, surface); 498 assert!(_prev_surface.is_none()); 499 500 // Store some information about surface allocations if in test mode 501 #[cfg(test)] 502 { 503 graph.surface_count += 1; 504 graph.unique_surfaces.insert(texture_id); 505 } 506 507 let mut task_ids = memory.new_vec(); 508 task_ids.push(*task_id); 509 510 // Add the target as a new subpass for this render pass. 511 pass.sub_passes.push(SubPass { 512 surface: SubPassSurface::Dynamic { 513 texture_id, 514 target_kind: kind, 515 used_rect: DeviceIntRect::from_origin_and_size(p, size), 516 }, 517 task_ids, 518 }); 519 } 520 521 // By now, we must have allocated a surface and rect for this task, so assign it! 522 assert!(location.is_some()); 523 task.location = RenderTaskLocation::Dynamic { 524 texture_id: location.unwrap().0, 525 rect: DeviceIntRect::from_origin_and_size(location.unwrap().1, size), 526 }; 527 } 528 RenderTaskLocation::Existing { parent_task_id, size: existing_size, .. } => { 529 let parent_task_location = graph.tasks[parent_task_id.index as usize].location.clone(); 530 531 match parent_task_location { 532 RenderTaskLocation::Unallocated { .. } | 533 RenderTaskLocation::CacheRequest { .. } | 534 RenderTaskLocation::Existing { .. } => { 535 panic!("bug: reference to existing task must be allocated by now"); 536 } 537 RenderTaskLocation::Dynamic { texture_id, rect, .. } => { 538 assert_eq!(existing_size, rect.size()); 539 540 let kind = graph.tasks[parent_task_id.index as usize].kind.target_kind(); 541 let mut task_ids = memory.new_vec(); 542 task_ids.push(*task_id); 543 // A sub-pass is always created in this case, as existing tasks by definition can't be shared. 544 pass.sub_passes.push(SubPass { 545 surface: SubPassSurface::Dynamic { 546 texture_id, 547 target_kind: kind, 548 used_rect: rect, // clear will be skipped due to no-op check anyway 549 }, 550 task_ids, 551 }); 552 553 let task = &mut graph.tasks[task_id.index as usize]; 554 task.location = parent_task_location; 555 } 556 RenderTaskLocation::Static { .. } => { 557 unreachable!("bug: not possible since we don't dup static locations"); 558 } 559 } 560 } 561 RenderTaskLocation::Static { ref surface, .. } => { 562 // No need to allocate for this surface, since it's a persistent 563 // target. Instead, just create a new sub-pass for it. 564 let mut task_ids = memory.new_vec(); 565 task_ids.push(*task_id); 566 pass.sub_passes.push(SubPass { 567 surface: SubPassSurface::Persistent { 568 surface: surface.clone(), 569 }, 570 task_ids, 571 }); 572 } 573 RenderTaskLocation::CacheRequest { .. } => { 574 // No need to allocate nor to create a sub-path for read-only locations. 575 } 576 RenderTaskLocation::Dynamic { .. } => { 577 // Dynamic tasks shouldn't be allocated by this point 578 panic!("bug: encountered an already allocated task"); 579 } 580 } 581 582 // Return the shared surfaces from this pass 583 let task = &graph.tasks[task_id.index as usize]; 584 for child_id in &task.children { 585 let child_task = &graph.tasks[child_id.index as usize]; 586 match child_task.location { 587 RenderTaskLocation::Unallocated { .. } | 588 RenderTaskLocation::Existing { .. } => panic!("bug: must be allocated"), 589 RenderTaskLocation::Dynamic { texture_id, .. } => { 590 // If this task can be freed after this pass, include it in the 591 // unique set of textures to be returned to the render target pool below. 592 if child_task.free_after == PassId(pass_id) { 593 self.textures_to_free.insert(texture_id); 594 } 595 } 596 RenderTaskLocation::Static { .. } => {} 597 RenderTaskLocation::CacheRequest { .. } => {} 598 } 599 } 600 } 601 602 // Return no longer used textures to the pool, so that they can be reused / aliased 603 // by later passes. 604 for texture_id in self.textures_to_free.drain() { 605 resource_cache.return_render_target_to_pool(texture_id); 606 self.active_surfaces.remove(&texture_id).unwrap(); 607 pass.textures_to_invalidate.push(texture_id); 608 } 609 } 610 611 if !self.active_surfaces.is_empty() { 612 graph.print(); 613 // By now, all surfaces that were borrowed from the render target pool must 614 // be returned to the resource cache, or we are leaking intermediate surfaces! 615 assert!(self.active_surfaces.is_empty()); 616 } 617 618 // Each task is now allocated to a surface and target rect. Write that to the 619 // GPU blocks and task_data. After this point, the graph is returned and is 620 // considered to be immutable for the rest of the frame building process. 621 622 for task in &mut graph.tasks { 623 // Check whether the render task texture and uv rects are managed externally. 624 // This is the case for image tasks and cached tasks. In both cases it 625 // results in a finding the information in the texture cache. 626 let cache_item = if let Some(ref cache_handle) = task.cache_handle { 627 Some(resolve_cached_render_task( 628 cache_handle, 629 resource_cache, 630 )) 631 } else if let RenderTaskKind::Image(info) = &task.kind { 632 Some(resolve_image( 633 info.request, 634 resource_cache, 635 &mut gpu_buffers.f32, 636 deferred_resolves, 637 info.is_composited, 638 )) 639 } else { 640 // General case (non-cached non-image tasks). 641 None 642 }; 643 644 if let Some(cache_item) = &cache_item { 645 task.uv_rect_handle = gpu_buffers.f32.resolve_handle(cache_item.uv_rect_handle); 646 647 // Update the render task even if the item is invalid. 648 // We'll handle it later and it's easier to not have to 649 // deal with unexpected location variants like 650 // RenderTaskLocation::CacheRequest when we do. 651 if let RenderTaskLocation::CacheRequest { .. } = &task.location { 652 let source = cache_item.texture_id; 653 task.location = RenderTaskLocation::Static { 654 surface: StaticRenderTaskSurface::ReadOnly { source }, 655 rect: cache_item.uv_rect, 656 }; 657 } 658 } 659 660 // This has to be done after we do the task location fixup above. 661 let target_rect = task.get_target_rect(); 662 663 // If the uv rect is not managed externally, generate it now. 664 if cache_item.is_none() { 665 let image_source = ImageSource { 666 p0: target_rect.min.to_f32(), 667 p1: target_rect.max.to_f32(), 668 user_data: [0.0; 4], 669 uv_rect_kind: task.uv_rect_kind, 670 }; 671 672 let uv_rect_handle = image_source.write_gpu_blocks(&mut gpu_buffers.f32); 673 task.uv_rect_handle = gpu_buffers.f32.resolve_handle(uv_rect_handle); 674 } 675 676 // Give the render task an opportunity to add any 677 // information to the GPU cache, if appropriate. 678 task.kind.write_gpu_blocks(gpu_buffers); 679 680 graph.task_data.push( 681 task.kind.write_task_data(target_rect) 682 ); 683 } 684 685 graph 686 } 687 } 688 689 impl RenderTaskGraph { 690 /// Print the render task graph to console 691 #[allow(dead_code)] 692 pub fn print( 693 &self, 694 ) { 695 print!("-- RenderTaskGraph --\n"); 696 697 for (i, task) in self.tasks.iter().enumerate() { 698 print!("Task {} [{}]: render_on={} free_after={} children={:?} target_size={:?}\n", 699 i, 700 task.kind.as_str(), 701 task.render_on.0, 702 task.free_after.0, 703 task.children, 704 task.get_target_size(), 705 ); 706 } 707 708 for (p, pass) in self.passes.iter().enumerate() { 709 print!("Pass {}:\n", p); 710 711 for (s, sub_pass) in pass.sub_passes.iter().enumerate() { 712 print!("\tSubPass {}: {:?}\n", 713 s, 714 sub_pass.surface, 715 ); 716 717 for task_id in &sub_pass.task_ids { 718 print!("\t\tTask {:?}\n", task_id.index); 719 } 720 } 721 } 722 } 723 724 pub fn resolve_texture( 725 &self, 726 task_id: impl Into<Option<RenderTaskId>>, 727 ) -> Option<TextureSource> { 728 let task_id = task_id.into()?; 729 let task = &self[task_id]; 730 731 match task.get_texture_source() { 732 TextureSource::Invalid => None, 733 source => Some(source), 734 } 735 } 736 737 pub fn resolve_location( 738 &self, 739 task_id: impl Into<Option<RenderTaskId>>, 740 ) -> Option<(GpuBufferAddress, TextureSource)> { 741 self.resolve_impl(task_id.into()?) 742 } 743 744 fn resolve_impl( 745 &self, 746 task_id: RenderTaskId, 747 ) -> Option<(GpuBufferAddress, TextureSource)> { 748 let task = &self[task_id]; 749 let texture_source = task.get_texture_source(); 750 751 if let TextureSource::Invalid = texture_source { 752 return None; 753 } 754 755 let uv_address = task.get_texture_address(); 756 assert!(uv_address.is_valid()); 757 758 Some((uv_address, texture_source)) 759 } 760 761 pub fn report_memory(&self) -> usize { 762 // We can't use wr_malloc_sizeof here because the render task 763 // graph's memory is mainly backed by frame's custom allocator. 764 // So we calulate the memory footprint manually. 765 766 let mut mem = size_of_frame_vec(&self.tasks) 767 + size_of_frame_vec(&self.task_data) 768 + size_of_frame_vec(&self.passes); 769 770 for pass in &self.passes { 771 mem += size_of_frame_vec(&pass.task_ids) 772 + size_of_frame_vec(&pass.sub_passes) 773 + size_of_frame_vec(&pass.textures_to_invalidate); 774 for sub_pass in &pass.sub_passes { 775 mem += size_of_frame_vec(&sub_pass.task_ids); 776 } 777 } 778 779 mem 780 } 781 782 #[cfg(test)] 783 pub fn new_for_testing() -> Self { 784 let allocator = FrameAllocator::fallback(); 785 RenderTaskGraph { 786 tasks: allocator.clone().new_vec(), 787 passes: allocator.clone().new_vec(), 788 frame_id: FrameId::INVALID, 789 task_data: allocator.clone().new_vec(), 790 surface_count: 0, 791 unique_surfaces: FastHashSet::default(), 792 } 793 } 794 795 /// Return the surface and texture counts, used for testing 796 #[cfg(test)] 797 pub fn surface_counts(&self) -> (usize, usize) { 798 (self.surface_count, self.unique_surfaces.len()) 799 } 800 801 /// Return current frame id, used for validation 802 #[cfg(debug_assertions)] 803 pub fn frame_id(&self) -> FrameId { 804 self.frame_id 805 } 806 } 807 808 /// Batching uses index access to read information about tasks 809 impl std::ops::Index<RenderTaskId> for RenderTaskGraph { 810 type Output = RenderTask; 811 fn index(&self, id: RenderTaskId) -> &RenderTask { 812 &self.tasks[id.index as usize] 813 } 814 } 815 816 fn assign_free_pass( 817 id: RenderTaskId, 818 graph: &mut RenderTaskGraph, 819 ) { 820 let task = &mut graph.tasks[id.index as usize]; 821 let render_on = task.render_on; 822 823 let mut child_task_ids: SmallVec<[RenderTaskId; 8]> = SmallVec::new(); 824 child_task_ids.extend_from_slice(&task.children); 825 826 for child_id in child_task_ids { 827 let child_location = graph.tasks[child_id.index as usize].location.clone(); 828 829 // Each dynamic child task can free its backing surface after the last 830 // task that references it as an input. Using min here ensures the 831 // safe time to free this surface in the presence of multiple paths 832 // to this task from the root(s). 833 match child_location { 834 RenderTaskLocation::CacheRequest { .. } => {} 835 RenderTaskLocation::Static { .. } => { 836 // never get freed anyway, so can leave untouched 837 // (could validate that they remain at PassId::MIN) 838 } 839 RenderTaskLocation::Dynamic { .. } => { 840 panic!("bug: should not be allocated yet"); 841 } 842 RenderTaskLocation::Unallocated { .. } => { 843 let child_task = &mut graph.tasks[child_id.index as usize]; 844 845 if child_task.free_after != PassId::INVALID { 846 child_task.free_after = child_task.free_after.min(render_on); 847 } 848 } 849 RenderTaskLocation::Existing { parent_task_id, .. } => { 850 let parent_task = &mut graph.tasks[parent_task_id.index as usize]; 851 parent_task.free_after = PassId::INVALID; 852 853 let child_task = &mut graph.tasks[child_id.index as usize]; 854 855 if child_task.free_after != PassId::INVALID { 856 child_task.free_after = child_task.free_after.min(render_on); 857 } 858 } 859 } 860 } 861 } 862 863 /// A render pass represents a set of rendering operations that don't depend on one 864 /// another. 865 /// 866 /// A render pass can have several render targets if there wasn't enough space in one 867 /// target to do all of the rendering for that pass. See `RenderTargetList`. 868 #[cfg_attr(feature = "capture", derive(Serialize))] 869 #[cfg_attr(feature = "replay", derive(Deserialize))] 870 pub struct RenderPass { 871 /// The subpasses that describe targets being rendered to in this pass 872 pub alpha: RenderTargetList, 873 pub color: RenderTargetList, 874 pub texture_cache: FastHashMap<CacheTextureId, RenderTarget>, 875 pub picture_cache: FrameVec<PictureCacheTarget>, 876 pub textures_to_invalidate: FrameVec<CacheTextureId>, 877 } 878 879 impl RenderPass { 880 /// Creates an intermediate off-screen pass. 881 pub fn new(src: &Pass, memory: &mut FrameMemory) -> Self { 882 RenderPass { 883 color: RenderTargetList::new(memory.allocator()), 884 alpha: RenderTargetList::new(memory.allocator()), 885 texture_cache: FastHashMap::default(), 886 picture_cache: memory.allocator().new_vec(), 887 textures_to_invalidate: src.textures_to_invalidate.clone(), 888 } 889 } 890 } 891 892 // Dump an SVG visualization of the render graph for debugging purposes 893 #[cfg(feature = "capture")] 894 pub fn dump_render_tasks_as_svg( 895 render_tasks: &RenderTaskGraph, 896 output: &mut dyn std::io::Write, 897 ) -> std::io::Result<()> { 898 use svg_fmt::*; 899 900 let node_width = 80.0; 901 let node_height = 30.0; 902 let vertical_spacing = 8.0; 903 let horizontal_spacing = 20.0; 904 let margin = 10.0; 905 let text_size = 10.0; 906 907 let mut pass_rects = Vec::new(); 908 let mut nodes = vec![None; render_tasks.tasks.len()]; 909 910 let mut x = margin; 911 let mut max_y: f32 = 0.0; 912 913 #[derive(Clone)] 914 struct Node { 915 rect: Rectangle, 916 label: Text, 917 size: Text, 918 } 919 920 for pass in render_tasks.passes.iter().rev() { 921 let mut layout = VerticalLayout::new(x, margin, node_width); 922 923 for task_id in &pass.task_ids { 924 let task_index = task_id.index as usize; 925 let task = &render_tasks.tasks[task_index]; 926 927 let rect = layout.push_rectangle(node_height); 928 929 let tx = rect.x + rect.w / 2.0; 930 let ty = rect.y + 10.0; 931 932 let label = text(tx, ty, format!("{}", task.kind.as_str())); 933 let size = text(tx, ty + 12.0, format!("{:?}", task.location.size())); 934 935 nodes[task_index] = Some(Node { rect, label, size }); 936 937 layout.advance(vertical_spacing); 938 } 939 940 pass_rects.push(layout.total_rectangle()); 941 942 x += node_width + horizontal_spacing; 943 max_y = max_y.max(layout.y + margin); 944 } 945 946 let mut links = Vec::new(); 947 for node_index in 0..nodes.len() { 948 if nodes[node_index].is_none() { 949 continue; 950 } 951 952 let task = &render_tasks.tasks[node_index]; 953 for dep in &task.children { 954 let dep_index = dep.index as usize; 955 956 if let (&Some(ref node), &Some(ref dep_node)) = (&nodes[node_index], &nodes[dep_index]) { 957 links.push(( 958 dep_node.rect.x + dep_node.rect.w, 959 dep_node.rect.y + dep_node.rect.h / 2.0, 960 node.rect.x, 961 node.rect.y + node.rect.h / 2.0, 962 )); 963 } 964 } 965 } 966 967 let svg_w = x + margin; 968 let svg_h = max_y + margin; 969 writeln!(output, "{}", BeginSvg { w: svg_w, h: svg_h })?; 970 971 // Background. 972 writeln!(output, 973 " {}", 974 rectangle(0.0, 0.0, svg_w, svg_h) 975 .inflate(1.0, 1.0) 976 .fill(rgb(50, 50, 50)) 977 )?; 978 979 // Passes. 980 for rect in pass_rects { 981 writeln!(output, 982 " {}", 983 rect.inflate(3.0, 3.0) 984 .border_radius(4.0) 985 .opacity(0.4) 986 .fill(black()) 987 )?; 988 } 989 990 // Links. 991 for (x1, y1, x2, y2) in links { 992 dump_task_dependency_link(output, x1, y1, x2, y2); 993 } 994 995 // Tasks. 996 for node in &nodes { 997 if let Some(node) = node { 998 writeln!(output, 999 " {}", 1000 node.rect 1001 .clone() 1002 .fill(black()) 1003 .border_radius(3.0) 1004 .opacity(0.5) 1005 .offset(0.0, 2.0) 1006 )?; 1007 writeln!(output, 1008 " {}", 1009 node.rect 1010 .clone() 1011 .fill(rgb(200, 200, 200)) 1012 .border_radius(3.0) 1013 .opacity(0.8) 1014 )?; 1015 1016 writeln!(output, 1017 " {}", 1018 node.label 1019 .clone() 1020 .size(text_size) 1021 .align(Align::Center) 1022 .color(rgb(50, 50, 50)) 1023 )?; 1024 writeln!(output, 1025 " {}", 1026 node.size 1027 .clone() 1028 .size(text_size * 0.7) 1029 .align(Align::Center) 1030 .color(rgb(50, 50, 50)) 1031 )?; 1032 } 1033 } 1034 1035 writeln!(output, "{}", EndSvg) 1036 } 1037 1038 #[allow(dead_code)] 1039 fn dump_task_dependency_link( 1040 output: &mut dyn std::io::Write, 1041 x1: f32, y1: f32, 1042 x2: f32, y2: f32, 1043 ) { 1044 use svg_fmt::*; 1045 1046 // If the link is a straight horizontal line and spans over multiple passes, it 1047 // is likely to go straight though unrelated nodes in a way that makes it look like 1048 // they are connected, so we bend the line upward a bit to avoid that. 1049 let simple_path = (y1 - y2).abs() > 1.0 || (x2 - x1) < 45.0; 1050 1051 let mid_x = (x1 + x2) / 2.0; 1052 if simple_path { 1053 write!(output, " {}", 1054 path().move_to(x1, y1) 1055 .cubic_bezier_to(mid_x, y1, mid_x, y2, x2, y2) 1056 .fill(Fill::None) 1057 .stroke(Stroke::Color(rgb(100, 100, 100), 3.0)) 1058 ).unwrap(); 1059 } else { 1060 let ctrl1_x = (mid_x + x1) / 2.0; 1061 let ctrl2_x = (mid_x + x2) / 2.0; 1062 let ctrl_y = y1 - 25.0; 1063 write!(output, " {}", 1064 path().move_to(x1, y1) 1065 .cubic_bezier_to(ctrl1_x, y1, ctrl1_x, ctrl_y, mid_x, ctrl_y) 1066 .cubic_bezier_to(ctrl2_x, ctrl_y, ctrl2_x, y2, x2, y2) 1067 .fill(Fill::None) 1068 .stroke(Stroke::Color(rgb(100, 100, 100), 3.0)) 1069 ).unwrap(); 1070 } 1071 } 1072 1073 /// Construct a picture cache render task location for testing 1074 #[cfg(test)] 1075 fn pc_target( 1076 surface_id: u64, 1077 tile_x: i32, 1078 tile_y: i32, 1079 ) -> RenderTaskLocation { 1080 use crate::{ 1081 composite::{NativeSurfaceId, NativeTileId}, 1082 picture::ResolvedSurfaceTexture, 1083 }; 1084 1085 let width = 512; 1086 let height = 512; 1087 1088 RenderTaskLocation::Static { 1089 surface: StaticRenderTaskSurface::PictureCache { 1090 surface: ResolvedSurfaceTexture::Native { 1091 id: NativeTileId { 1092 surface_id: NativeSurfaceId(surface_id), 1093 x: tile_x, 1094 y: tile_y, 1095 }, 1096 size: DeviceIntSize::new(width, height), 1097 }, 1098 }, 1099 rect: DeviceIntSize::new(width, height).into(), 1100 } 1101 } 1102 1103 #[cfg(test)] 1104 impl RenderTaskGraphBuilder { 1105 fn test_expect( 1106 mut self, 1107 pass_count: usize, 1108 total_surface_count: usize, 1109 unique_surfaces: &[(i32, i32, ImageFormat)], 1110 ) { 1111 use crate::{internal_types::FrameStamp, renderer::{GpuBufferBuilderF, GpuBufferBuilderI}}; 1112 use api::{DocumentId, IdNamespace}; 1113 1114 let mut rc = ResourceCache::new_for_testing(); 1115 1116 let mut frame_stamp = FrameStamp::first(DocumentId::new(IdNamespace(1), 1)); 1117 frame_stamp.advance(); 1118 1119 let frame_memory = FrameMemory::fallback(); 1120 let mut gpu_buffers = GpuBufferBuilder { 1121 f32: GpuBufferBuilderF::new(&frame_memory, 0, FrameId::first()), 1122 i32: GpuBufferBuilderI::new(&frame_memory, 0, FrameId::first()), 1123 }; 1124 let g = self.end_frame(&mut rc, &mut gpu_buffers, &mut frame_memory.new_vec(), 2048, &frame_memory); 1125 g.print(); 1126 1127 assert_eq!(g.passes.len(), pass_count); 1128 assert_eq!(g.surface_counts(), (total_surface_count, unique_surfaces.len())); 1129 1130 rc.validate_surfaces(unique_surfaces); 1131 } 1132 } 1133 1134 /// Construct a testing render task with given location 1135 #[cfg(test)] 1136 fn task_location(location: RenderTaskLocation) -> RenderTask { 1137 RenderTask::new_test( 1138 location, 1139 RenderTargetKind::Color, 1140 ) 1141 } 1142 1143 /// Construct a dynamic render task location for testing 1144 #[cfg(test)] 1145 fn task_dynamic(size: i32) -> RenderTask { 1146 RenderTask::new_test( 1147 RenderTaskLocation::Unallocated { size: DeviceIntSize::new(size, size) }, 1148 RenderTargetKind::Color, 1149 ) 1150 } 1151 1152 #[test] 1153 fn fg_test_1() { 1154 // Test that a root target can be used as an input for readbacks 1155 // This functionality isn't currently used, but will be in future. 1156 1157 let mut gb = RenderTaskGraphBuilder::new(); 1158 1159 let root_target = pc_target(0, 0, 0); 1160 1161 let root = gb.add().init(task_location(root_target.clone())); 1162 1163 let readback = gb.add().init(task_dynamic(100)); 1164 gb.add_dependency(readback, root); 1165 1166 let mix_blend_content = gb.add().init(task_dynamic(50)); 1167 1168 let content = gb.add().init(task_location(root_target)); 1169 gb.add_dependency(content, readback); 1170 gb.add_dependency(content, mix_blend_content); 1171 1172 gb.test_expect(3, 1, &[ 1173 (2048, 2048, ImageFormat::RGBA8), 1174 ]); 1175 } 1176 1177 #[test] 1178 fn fg_test_3() { 1179 // Test that small targets are allocated in a shared surface, and that large 1180 // tasks are allocated in a rounded up texture size. 1181 1182 let mut gb = RenderTaskGraphBuilder::new(); 1183 1184 let pc_root = gb.add().init(task_location(pc_target(0, 0, 0))); 1185 1186 let child_pic_0 = gb.add().init(task_dynamic(128)); 1187 let child_pic_1 = gb.add().init(task_dynamic(3000)); 1188 1189 gb.add_dependency(pc_root, child_pic_0); 1190 gb.add_dependency(pc_root, child_pic_1); 1191 1192 gb.test_expect(2, 2, &[ 1193 (2048, 2048, ImageFormat::RGBA8), 1194 (3072, 3072, ImageFormat::RGBA8), 1195 ]); 1196 } 1197 1198 #[test] 1199 fn fg_test_4() { 1200 // Test that for a simple dependency chain of tasks, that render 1201 // target surfaces are aliased and reused between passes where possible. 1202 1203 let mut gb = RenderTaskGraphBuilder::new(); 1204 1205 let pc_root = gb.add().init(task_location(pc_target(0, 0, 0))); 1206 1207 let child_pic_0 = gb.add().init(task_dynamic(128)); 1208 let child_pic_1 = gb.add().init(task_dynamic(128)); 1209 let child_pic_2 = gb.add().init(task_dynamic(128)); 1210 1211 gb.add_dependency(pc_root, child_pic_0); 1212 gb.add_dependency(child_pic_0, child_pic_1); 1213 gb.add_dependency(child_pic_1, child_pic_2); 1214 1215 gb.test_expect(4, 3, &[ 1216 (2048, 2048, ImageFormat::RGBA8), 1217 (2048, 2048, ImageFormat::RGBA8), 1218 ]); 1219 } 1220 1221 #[test] 1222 fn fg_test_5() { 1223 // Test that a task that is used as an input by direct parent and also 1224 // distance ancestor are scheduled correctly, and allocates the correct 1225 // number of passes, taking advantage of surface reuse / aliasing where feasible. 1226 1227 let mut gb = RenderTaskGraphBuilder::new(); 1228 1229 let pc_root = gb.add().init(task_location(pc_target(0, 0, 0))); 1230 1231 let child_pic_0 = gb.add().init(task_dynamic(128)); 1232 let child_pic_1 = gb.add().init(task_dynamic(64)); 1233 let child_pic_2 = gb.add().init(task_dynamic(32)); 1234 let child_pic_3 = gb.add().init(task_dynamic(16)); 1235 1236 gb.add_dependency(pc_root, child_pic_0); 1237 gb.add_dependency(child_pic_0, child_pic_1); 1238 gb.add_dependency(child_pic_1, child_pic_2); 1239 gb.add_dependency(child_pic_2, child_pic_3); 1240 gb.add_dependency(pc_root, child_pic_3); 1241 1242 gb.test_expect(5, 4, &[ 1243 (2048, 2048, ImageFormat::RGBA8), 1244 (2048, 2048, ImageFormat::RGBA8), 1245 (2048, 2048, ImageFormat::RGBA8), 1246 ]); 1247 } 1248 1249 #[test] 1250 fn fg_test_6() { 1251 // Test that a task that is used as an input dependency by two parent 1252 // tasks is correctly allocated and freed. 1253 1254 let mut gb = RenderTaskGraphBuilder::new(); 1255 1256 let pc_root_1 = gb.add().init(task_location(pc_target(0, 0, 0))); 1257 let pc_root_2 = gb.add().init(task_location(pc_target(0, 1, 0))); 1258 1259 let child_pic = gb.add().init(task_dynamic(128)); 1260 1261 gb.add_dependency(pc_root_1, child_pic); 1262 gb.add_dependency(pc_root_2, child_pic); 1263 1264 gb.test_expect(2, 1, &[ 1265 (2048, 2048, ImageFormat::RGBA8), 1266 ]); 1267 } 1268 1269 #[test] 1270 fn fg_test_7() { 1271 // Test that a standalone surface is not incorrectly used to 1272 // allocate subsequent shared task rects. 1273 1274 let mut gb = RenderTaskGraphBuilder::new(); 1275 1276 let pc_root = gb.add().init(task_location(pc_target(0, 0, 0))); 1277 1278 let child0 = gb.add().init(task_dynamic(16)); 1279 let child1 = gb.add().init(task_dynamic(16)); 1280 1281 let child2 = gb.add().init(task_dynamic(16)); 1282 let child3 = gb.add().init(task_dynamic(16)); 1283 1284 gb.add_dependency(pc_root, child0); 1285 gb.add_dependency(child0, child1); 1286 gb.add_dependency(pc_root, child1); 1287 1288 gb.add_dependency(pc_root, child2); 1289 gb.add_dependency(child2, child3); 1290 1291 gb.test_expect(3, 3, &[ 1292 (2048, 2048, ImageFormat::RGBA8), 1293 (2048, 2048, ImageFormat::RGBA8), 1294 (2048, 2048, ImageFormat::RGBA8), 1295 ]); 1296 }