gpu_buffer.rs (17860B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /* 6 TODO: 7 Efficiently allow writing to buffer (better push interface) 8 */ 9 10 use std::i32; 11 12 use crate::gpu_types::UvRectKind; 13 use crate::internal_types::{FrameId, FrameMemory, FrameVec}; 14 use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; 15 use crate::util::ScaleOffset; 16 use api::units::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceRect, LayoutRect, PictureRect}; 17 use api::{PremultipliedColorF, ImageFormat}; 18 use crate::device::Texel; 19 use crate::render_task_graph::{RenderTaskGraph, RenderTaskId}; 20 21 pub struct GpuBufferBuilder { 22 pub i32: GpuBufferBuilderI, 23 pub f32: GpuBufferBuilderF, 24 } 25 26 pub type GpuBufferF = GpuBuffer<GpuBufferBlockF>; 27 pub type GpuBufferBuilderF = GpuBufferBuilderImpl<GpuBufferBlockF>; 28 29 pub type GpuBufferI = GpuBuffer<GpuBufferBlockI>; 30 pub type GpuBufferBuilderI = GpuBufferBuilderImpl<GpuBufferBlockI>; 31 32 pub type GpuBufferWriterF<'l> = GpuBufferWriter<'l, GpuBufferBlockF>; 33 pub type GpuBufferWriterI<'l> = GpuBufferWriter<'l, GpuBufferBlockI>; 34 35 unsafe impl Texel for GpuBufferBlockF { 36 fn image_format() -> ImageFormat { ImageFormat::RGBAF32 } 37 } 38 39 unsafe impl Texel for GpuBufferBlockI { 40 fn image_format() -> ImageFormat { ImageFormat::RGBAI32 } 41 } 42 43 impl Default for GpuBufferBlockF { 44 fn default() -> Self { 45 GpuBufferBlockF::EMPTY 46 } 47 } 48 49 impl Default for GpuBufferBlockI { 50 fn default() -> Self { 51 GpuBufferBlockI::EMPTY 52 } 53 } 54 55 /// A single texel in RGBAF32 texture - 16 bytes. 56 #[derive(Copy, Clone, Debug, MallocSizeOf)] 57 #[cfg_attr(feature = "capture", derive(Serialize))] 58 #[cfg_attr(feature = "replay", derive(Deserialize))] 59 pub struct GpuBufferBlockF { 60 data: [f32; 4], 61 } 62 63 /// A single texel in RGBAI32 texture - 16 bytes. 64 #[derive(Copy, Clone, Debug, MallocSizeOf)] 65 #[cfg_attr(feature = "capture", derive(Serialize))] 66 #[cfg_attr(feature = "replay", derive(Deserialize))] 67 pub struct GpuBufferBlockI { 68 data: [i32; 4], 69 } 70 71 /// GpuBuffer handle is similar to GpuBufferAddress with additional checks 72 /// to avoid accidentally using the same handle in multiple frames. 73 /// 74 /// Do not send GpuBufferHandle to the GPU directly. Instead use a GpuBuffer 75 /// or GpuBufferBuilder to resolve the handle into a GpuBufferAddress that 76 /// can be placed into GPU data. 77 /// 78 /// The extra checks consists into storing an 8 bit epoch in the upper 8 bits 79 /// of the handle. The epoch will be reused every 255 frames so this is not 80 /// a mechanism that one can rely on to store and reuse handles over multiple 81 /// frames. It is only a mechanism to catch mistakes where a handle is 82 /// accidentally used in the wrong frame and panic. 83 #[repr(transparent)] 84 #[derive(Copy, Clone, MallocSizeOf, Eq, PartialEq)] 85 #[cfg_attr(feature = "capture", derive(Serialize))] 86 #[cfg_attr(feature = "replay", derive(Deserialize))] 87 pub struct GpuBufferHandle(u32); 88 89 impl GpuBufferHandle { 90 pub const INVALID: GpuBufferHandle = GpuBufferHandle(u32::MAX - 1); 91 const EPOCH_MASK: u32 = 0xFF000000; 92 93 fn new(addr: u32, epoch: u32) -> Self { 94 Self(addr | epoch) 95 } 96 97 pub fn address_unchecked(&self) -> GpuBufferAddress { 98 GpuBufferAddress(self.0 & !Self::EPOCH_MASK) 99 } 100 } 101 102 impl std::fmt::Debug for GpuBufferHandle { 103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 104 let addr = self.0 & !Self::EPOCH_MASK; 105 let epoch = (self.0 & Self::EPOCH_MASK) >> 24; 106 write!(f, "#{addr}@{epoch}") 107 } 108 } 109 110 // TODO(gw): Temporarily encode GPU Cache addresses as a single int. 111 // In the future, we can change the PrimitiveInstanceData struct 112 // to use 2x u16 for the vertex attribute instead of an i32. 113 #[repr(transparent)] 114 #[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)] 115 #[cfg_attr(feature = "capture", derive(Serialize))] 116 #[cfg_attr(feature = "replay", derive(Deserialize))] 117 pub struct GpuBufferAddress(u32); 118 119 impl GpuBufferAddress { 120 pub fn new(u: u16, v: u16) -> Self { 121 GpuBufferAddress( 122 v as u32 * MAX_VERTEX_TEXTURE_WIDTH as u32 + u as u32 123 ) 124 } 125 126 pub fn is_valid(&self) -> bool { 127 *self != Self::INVALID 128 } 129 130 pub fn as_u32(self) -> u32 { 131 self.0 132 } 133 134 pub fn from_u32(val: u32) -> Self { 135 GpuBufferAddress(val) 136 } 137 138 #[allow(dead_code)] 139 pub fn as_int(self) -> i32 { 140 self.0 as i32 141 } 142 143 #[allow(dead_code)] 144 pub fn uv(self) -> (u16, u16) { 145 ( 146 (self.0 as usize % MAX_VERTEX_TEXTURE_WIDTH) as u16, 147 (self.0 as usize / MAX_VERTEX_TEXTURE_WIDTH) as u16, 148 ) 149 } 150 151 pub const INVALID: GpuBufferAddress = GpuBufferAddress(u32::MAX - 1); 152 } 153 154 impl GpuBufferBlockF { 155 pub const EMPTY: Self = GpuBufferBlockF { data: [0.0; 4] }; 156 } 157 158 impl GpuBufferBlockI { 159 pub const EMPTY: Self = GpuBufferBlockI { data: [0; 4] }; 160 } 161 162 impl Into<GpuBufferBlockF> for LayoutRect { 163 fn into(self) -> GpuBufferBlockF { 164 GpuBufferBlockF { 165 data: [ 166 self.min.x, 167 self.min.y, 168 self.max.x, 169 self.max.y, 170 ], 171 } 172 } 173 } 174 175 impl Into<GpuBufferBlockF> for crate::quad::LayoutOrDeviceRect { 176 fn into(self) -> GpuBufferBlockF { 177 GpuBufferBlockF { 178 data: [ 179 self.min.x, 180 self.min.y, 181 self.max.x, 182 self.max.y, 183 ], 184 } 185 } 186 } 187 188 impl Into<GpuBufferBlockF> for ScaleOffset { 189 fn into(self) -> GpuBufferBlockF { 190 GpuBufferBlockF { 191 data: [ 192 self.scale.x, 193 self.scale.y, 194 self.offset.x, 195 self.offset.y, 196 ], 197 } 198 } 199 } 200 201 impl Into<GpuBufferBlockF> for PictureRect { 202 fn into(self) -> GpuBufferBlockF { 203 GpuBufferBlockF { 204 data: [ 205 self.min.x, 206 self.min.y, 207 self.max.x, 208 self.max.y, 209 ], 210 } 211 } 212 } 213 214 impl Into<GpuBufferBlockF> for DeviceRect { 215 fn into(self) -> GpuBufferBlockF { 216 GpuBufferBlockF { 217 data: [ 218 self.min.x, 219 self.min.y, 220 self.max.x, 221 self.max.y, 222 ], 223 } 224 } 225 } 226 227 impl Into<GpuBufferBlockF> for PremultipliedColorF { 228 fn into(self) -> GpuBufferBlockF { 229 GpuBufferBlockF { 230 data: [ 231 self.r, 232 self.g, 233 self.b, 234 self.a, 235 ], 236 } 237 } 238 } 239 240 impl From<DeviceIntRect> for GpuBufferBlockF { 241 fn from(rect: DeviceIntRect) -> Self { 242 GpuBufferBlockF { 243 data: [ 244 rect.min.x as f32, 245 rect.min.y as f32, 246 rect.max.x as f32, 247 rect.max.y as f32, 248 ], 249 } 250 } 251 } 252 253 impl From<DeviceIntRect> for GpuBufferBlockI { 254 fn from(rect: DeviceIntRect) -> Self { 255 GpuBufferBlockI { 256 data: [ 257 rect.min.x, 258 rect.min.y, 259 rect.max.x, 260 rect.max.y, 261 ], 262 } 263 } 264 } 265 266 impl Into<GpuBufferBlockF> for [f32; 4] { 267 fn into(self) -> GpuBufferBlockF { 268 GpuBufferBlockF { 269 data: self, 270 } 271 } 272 } 273 274 impl Into<GpuBufferBlockI> for [i32; 4] { 275 fn into(self) -> GpuBufferBlockI { 276 GpuBufferBlockI { 277 data: self, 278 } 279 } 280 } 281 282 pub trait GpuBufferDataF { 283 const NUM_BLOCKS: usize; 284 fn write(&self, writer: &mut GpuBufferWriterF); 285 } 286 287 pub trait GpuBufferDataI { 288 const NUM_BLOCKS: usize; 289 fn write(&self, writer: &mut GpuBufferWriterI); 290 } 291 292 impl GpuBufferDataF for [f32; 4] { 293 const NUM_BLOCKS: usize = 1; 294 fn write(&self, writer: &mut GpuBufferWriterF) { 295 writer.push_one(*self); 296 } 297 } 298 299 impl GpuBufferDataI for [i32; 4] { 300 const NUM_BLOCKS: usize = 1; 301 fn write(&self, writer: &mut GpuBufferWriterI) { 302 writer.push_one(*self); 303 } 304 } 305 306 /// Record a patch to the GPU buffer for a render task 307 struct DeferredBlock { 308 task_id: RenderTaskId, 309 index: usize, 310 } 311 312 /// Interface to allow writing multiple GPU blocks, possibly of different types 313 pub struct GpuBufferWriter<'a, T> { 314 buffer: &'a mut FrameVec<T>, 315 deferred: &'a mut Vec<DeferredBlock>, 316 index: usize, 317 max_block_count: usize, 318 epoch: u32, 319 } 320 321 impl<'a, T> GpuBufferWriter<'a, T> where T: Texel { 322 fn new( 323 buffer: &'a mut FrameVec<T>, 324 deferred: &'a mut Vec<DeferredBlock>, 325 index: usize, 326 max_block_count: usize, 327 epoch: u32, 328 ) -> Self { 329 GpuBufferWriter { 330 buffer, 331 deferred, 332 index, 333 max_block_count, 334 epoch, 335 } 336 } 337 338 /// Push one (16 byte) block of data in to the writer 339 pub fn push_one<B>(&mut self, block: B) where B: Into<T> { 340 self.buffer.push(block.into()); 341 } 342 343 /// Push a reference to a render task in to the writer. Once the render 344 /// task graph is resolved, this will be patched with the UV rect of the task 345 pub fn push_render_task(&mut self, task_id: RenderTaskId) { 346 if task_id != RenderTaskId::INVALID { 347 self.deferred.push(DeferredBlock { 348 task_id, 349 index: self.buffer.len(), 350 }); 351 } 352 353 self.buffer.push(T::default()); 354 } 355 356 /// Close this writer, returning the GPU address of this set of block(s). 357 pub fn finish(self) -> GpuBufferAddress { 358 assert!(self.buffer.len() <= self.index + self.max_block_count); 359 360 GpuBufferAddress(self.index as u32) 361 } 362 363 /// Close this writer, returning the GPU address of this set of block(s). 364 pub fn finish_with_handle(self) -> GpuBufferHandle { 365 assert!(self.buffer.len() <= self.index + self.max_block_count); 366 367 GpuBufferHandle::new(self.index as u32, self.epoch) 368 } 369 } 370 371 impl<'a> GpuBufferWriterF<'a> { 372 pub fn push<Data: GpuBufferDataF>(&mut self, data: &Data) { 373 let _start_index = self.buffer.len(); 374 data.write(self); 375 debug_assert_eq!(self.buffer.len() - _start_index, Data::NUM_BLOCKS); 376 } 377 } 378 379 impl<'a> GpuBufferWriterI<'a> { 380 pub fn push<Data: GpuBufferDataI>(&mut self, data: &Data) { 381 data.write(self); 382 } 383 } 384 385 impl<'a, T> Drop for GpuBufferWriter<'a, T> { 386 fn drop(&mut self) { 387 assert!(self.buffer.len() <= self.index + self.max_block_count, "Attempt to write too many GpuBuffer blocks"); 388 } 389 } 390 391 pub struct GpuBufferBuilderImpl<T> { 392 // `data` will become the backing store of the GpuBuffer sent along 393 // with the frame so it uses the frame allocator. 394 data: FrameVec<T>, 395 // `deferred` is only used during frame building and not sent with the 396 // built frame, so it does not use the same allocator. 397 deferred: Vec<DeferredBlock>, 398 399 epoch: u32, 400 } 401 402 impl<T> GpuBufferBuilderImpl<T> where T: Texel + std::convert::From<DeviceIntRect> { 403 pub fn new(memory: &FrameMemory, capacity: usize, frame_id: FrameId) -> Self { 404 // Pick the first 8 bits of the frame id and store them in the upper bits 405 // of the handles. 406 let epoch = ((frame_id.as_u64() % 254) as u32 + 1) << 24; 407 GpuBufferBuilderImpl { 408 data: memory.new_vec_with_capacity(capacity), 409 deferred: Vec::new(), 410 epoch, 411 } 412 } 413 414 #[allow(dead_code)] 415 pub fn push( 416 &mut self, 417 blocks: &[T], 418 ) -> GpuBufferAddress { 419 assert!(blocks.len() <= MAX_VERTEX_TEXTURE_WIDTH); 420 421 ensure_row_capacity(&mut self.data, blocks.len()); 422 423 let index = self.data.len(); 424 425 self.data.extend_from_slice(blocks); 426 427 GpuBufferAddress(index as u32 | self.epoch) 428 } 429 430 /// Begin writing a specific number of blocks 431 pub fn write_blocks( 432 &mut self, 433 max_block_count: usize, 434 ) -> GpuBufferWriter<T> { 435 assert!(max_block_count <= MAX_VERTEX_TEXTURE_WIDTH); 436 437 ensure_row_capacity(&mut self.data, max_block_count); 438 439 let index = self.data.len(); 440 441 GpuBufferWriter::new( 442 &mut self.data, 443 &mut self.deferred, 444 index, 445 max_block_count, 446 self.epoch, 447 ) 448 } 449 450 // Reserve space in the gpu buffer for data that will be written by the 451 // renderer. 452 pub fn reserve_renderer_deferred_blocks(&mut self, block_count: usize) -> GpuBufferHandle { 453 ensure_row_capacity(&mut self.data, block_count); 454 455 let index = self.data.len(); 456 457 self.data.reserve(block_count); 458 for _ in 0 ..block_count { 459 self.data.push(Default::default()); 460 } 461 462 GpuBufferHandle::new(index as u32, self.epoch) 463 } 464 465 pub fn finalize( 466 mut self, 467 render_tasks: &RenderTaskGraph, 468 ) -> GpuBuffer<T> { 469 finish_row(&mut self.data); 470 471 let len = self.data.len(); 472 assert!(len % MAX_VERTEX_TEXTURE_WIDTH == 0); 473 474 // At this point, we know that the render task graph has been built, and we can 475 // query the location of any dynamic (render target) or static (texture cache) 476 // task. This allows us to patch the UV rects in to the GPU buffer before upload 477 // to the GPU. 478 for block in self.deferred.drain(..) { 479 let render_task = &render_tasks[block.task_id]; 480 let target_rect = render_task.get_target_rect(); 481 482 let uv_rect = match render_task.uv_rect_kind() { 483 UvRectKind::Rect => { 484 target_rect 485 } 486 UvRectKind::Quad { top_left, bottom_right, .. } => { 487 let size = target_rect.size(); 488 489 DeviceIntRect::new( 490 DeviceIntPoint::new( 491 target_rect.min.x + (top_left.x * size.width as f32).round() as i32, 492 target_rect.min.y + (top_left.y * size.height as f32).round() as i32, 493 ), 494 DeviceIntPoint::new( 495 target_rect.min.x + (bottom_right.x * size.width as f32).round() as i32, 496 target_rect.min.y + (bottom_right.y * size.height as f32).round() as i32, 497 ), 498 ) 499 } 500 }; 501 502 self.data[block.index] = uv_rect.into(); 503 } 504 505 GpuBuffer { 506 data: self.data, 507 size: DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, (len / MAX_VERTEX_TEXTURE_WIDTH) as i32), 508 format: T::image_format(), 509 epoch: self.epoch, 510 } 511 } 512 513 pub fn resolve_handle(&self, handle: GpuBufferHandle) -> GpuBufferAddress { 514 if handle == GpuBufferHandle::INVALID { 515 return GpuBufferAddress::INVALID; 516 } 517 518 let epoch = handle.0 & GpuBufferHandle::EPOCH_MASK; 519 assert!(self.epoch == epoch); 520 521 GpuBufferAddress(handle.0 & !GpuBufferHandle::EPOCH_MASK) 522 } 523 524 /// Panics if the handle cannot be used this frame. 525 #[allow(unused)] 526 pub fn check_handle(&self, handle: GpuBufferHandle) { 527 if handle == GpuBufferHandle::INVALID { 528 return; 529 } 530 let epoch = handle.0 & GpuBufferHandle::EPOCH_MASK; 531 assert!(self.epoch == epoch); 532 } 533 } 534 535 fn ensure_row_capacity<T: Default>(data: &mut FrameVec<T>, cap: usize) { 536 if (data.len() % MAX_VERTEX_TEXTURE_WIDTH) + cap > MAX_VERTEX_TEXTURE_WIDTH { 537 finish_row(data); 538 } 539 } 540 541 fn finish_row<T: Default>(data: &mut FrameVec<T>) { 542 let required_len = (data.len() + MAX_VERTEX_TEXTURE_WIDTH-1) & !(MAX_VERTEX_TEXTURE_WIDTH-1); 543 for _ in 0 .. required_len - data.len() { 544 data.push(T::default()); 545 } 546 } 547 548 #[cfg_attr(feature = "capture", derive(Serialize))] 549 #[cfg_attr(feature = "replay", derive(Deserialize))] 550 pub struct GpuBuffer<T> { 551 pub data: FrameVec<T>, 552 pub size: DeviceIntSize, 553 pub format: ImageFormat, 554 epoch: u32, 555 } 556 557 impl<T> GpuBuffer<T> { 558 pub fn is_empty(&self) -> bool { 559 self.data.is_empty() 560 } 561 562 pub fn resolve_handle(&self, handle: GpuBufferHandle) -> GpuBufferAddress { 563 if handle == GpuBufferHandle::INVALID { 564 return GpuBufferAddress::INVALID; 565 } 566 567 let epoch = handle.0 & GpuBufferHandle::EPOCH_MASK; 568 assert!(self.epoch == epoch); 569 570 GpuBufferAddress(handle.0 & !GpuBufferHandle::EPOCH_MASK) 571 } 572 } 573 574 #[test] 575 fn test_gpu_buffer_sizing_push() { 576 let frame_memory = FrameMemory::fallback(); 577 let render_task_graph = RenderTaskGraph::new_for_testing(); 578 let mut builder = GpuBufferBuilderF::new(&frame_memory, 0, FrameId::first()); 579 580 let row = vec![GpuBufferBlockF::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]; 581 builder.push(&row); 582 583 builder.push(&[GpuBufferBlockF::EMPTY]); 584 builder.push(&[GpuBufferBlockF::EMPTY]); 585 586 let buffer = builder.finalize(&render_task_graph); 587 assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2); 588 } 589 590 #[test] 591 fn test_gpu_buffer_sizing_writer() { 592 let frame_memory = FrameMemory::fallback(); 593 let render_task_graph = RenderTaskGraph::new_for_testing(); 594 let mut builder = GpuBufferBuilderF::new(&frame_memory, 0, FrameId::first()); 595 596 let mut writer = builder.write_blocks(MAX_VERTEX_TEXTURE_WIDTH); 597 for _ in 0 .. MAX_VERTEX_TEXTURE_WIDTH { 598 writer.push_one(GpuBufferBlockF::EMPTY); 599 } 600 writer.finish(); 601 602 let mut writer = builder.write_blocks(1); 603 writer.push_one(GpuBufferBlockF::EMPTY); 604 writer.finish(); 605 606 let mut writer = builder.write_blocks(1); 607 writer.push_one(GpuBufferBlockF::EMPTY); 608 writer.finish(); 609 610 let buffer = builder.finalize(&render_task_graph); 611 assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2); 612 }