upload.rs (32915B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 //! This module contains the convoluted logic that goes into uploading content into 6 //! the texture cache's textures. 7 //! 8 //! We need to support various combinations of code paths depending on the quirks of 9 //! each hardware/driver configuration: 10 //! - direct upload, 11 //! - staged upload via a pixel buffer object, 12 //! - staged upload via a direct upload to a staging texture where PBO's aren't supported, 13 //! - copy from the staging to destination textures, either via blits or batched draw calls. 14 //! 15 //! Conceptually a lot of this logic should probably be in the device module, but some code 16 //! here relies on submitting draw calls via the renderer. 17 18 19 use std::mem; 20 use std::collections::VecDeque; 21 use std::sync::Arc; 22 use std::time::Duration; 23 use euclid::{Transform3D, point2}; 24 use malloc_size_of::MallocSizeOfOps; 25 use api::units::*; 26 use api::{ExternalImageSource, ImageBufferKind, ImageFormat}; 27 use crate::renderer::{ 28 Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR 29 }; 30 use crate::internal_types::{ 31 FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate, 32 CacheTextureId, RenderTargetInfo, 33 }; 34 use crate::device::{ 35 Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader, 36 TextureFilter, 37 }; 38 use crate::gpu_types::CopyInstance; 39 use crate::batch::BatchTextures; 40 use crate::texture_pack::{GuillotineAllocator, FreeRectSlice}; 41 use crate::profiler; 42 use crate::render_api::MemoryReport; 43 44 pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512); 45 const BATCH_UPLOAD_FORMAT_COUNT: usize = 4; 46 47 /// Upload a number of items to texture cache textures. 48 /// 49 /// This is the main entry point of the texture cache upload code. 50 /// See also the module documentation for more information. 51 pub fn upload_to_texture_cache( 52 renderer: &mut Renderer, 53 update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>, 54 ) { 55 56 let mut stats = UploadStats { 57 num_draw_calls: 0, 58 upload_time: 0, 59 cpu_buffer_alloc_time: 0, 60 texture_alloc_time: 0, 61 cpu_copy_time: 0, 62 gpu_copy_commands_time: 0, 63 bytes_uploaded: 0, 64 items_uploaded: 0, 65 }; 66 67 let upload_total_start = zeitstempel::now(); 68 69 let mut batch_upload_textures = Vec::new(); 70 71 // A list of copies that must be performed from the temporary textures to the texture cache. 72 let mut batch_upload_copies = Vec::new(); 73 74 // For each texture format, this stores a list of staging buffers 75 // and a texture allocator for packing the buffers. 76 let mut batch_upload_buffers = FastHashMap::default(); 77 78 // For best performance we use a single TextureUploader for all uploads. 79 // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs. 80 let mut uploader = renderer.device.upload_texture( 81 &mut renderer.texture_upload_pbo_pool, 82 ); 83 84 let num_updates = update_list.len(); 85 86 for (texture_id, updates) in update_list { 87 let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture; 88 for update in updates { 89 let TextureCacheUpdate { rect, stride, offset, format_override, source } = update; 90 let mut arc_data = None; 91 let dummy_data; 92 let data = match source { 93 TextureUpdateSource::Bytes { ref data } => { 94 arc_data = Some(data.clone()); 95 &data[offset as usize ..] 96 } 97 TextureUpdateSource::External { id, channel_index } => { 98 let handler = renderer.external_image_handler 99 .as_mut() 100 .expect("Found external image, but no handler set!"); 101 // The filter is only relevant for NativeTexture external images. 102 match handler.lock(id, channel_index, false).source { 103 ExternalImageSource::RawData(data) => { 104 &data[offset as usize ..] 105 } 106 ExternalImageSource::Invalid => { 107 // Create a local buffer to fill the pbo. 108 let bpp = texture.get_format().bytes_per_pixel(); 109 let width = stride.unwrap_or(rect.width() * bpp); 110 let total_size = width * rect.height(); 111 // WR haven't support RGBAF32 format in texture_cache, so 112 // we use u8 type here. 113 dummy_data = vec![0xFFu8; total_size as usize]; 114 &dummy_data 115 } 116 ExternalImageSource::NativeTexture(eid) => { 117 panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); 118 } 119 } 120 } 121 TextureUpdateSource::DebugClear => { 122 let draw_target = DrawTarget::from_texture( 123 texture, 124 false, 125 ); 126 renderer.device.bind_draw_target(draw_target); 127 renderer.device.clear_target( 128 Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), 129 None, 130 Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) 131 ); 132 133 continue; 134 } 135 }; 136 137 stats.items_uploaded += 1; 138 139 let use_batch_upload = renderer.device.use_batched_texture_uploads() && 140 texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) && 141 rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width && 142 rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height && 143 rect.area() < renderer.device.batched_upload_threshold(); 144 145 if use_batch_upload 146 && arc_data.is_some() 147 && matches!(renderer.device.upload_method(), &UploadMethod::Immediate) 148 && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 { 149 skip_staging_buffer( 150 &mut renderer.device, 151 &mut renderer.staging_texture_pool, 152 rect, 153 stride, 154 arc_data.unwrap(), 155 texture_id, 156 texture, 157 &mut batch_upload_buffers, 158 &mut batch_upload_textures, 159 &mut batch_upload_copies, 160 &mut stats, 161 ); 162 } else if use_batch_upload { 163 copy_into_staging_buffer( 164 &mut renderer.device, 165 &mut uploader, 166 &mut renderer.staging_texture_pool, 167 rect, 168 stride, 169 data, 170 texture_id, 171 texture, 172 &mut batch_upload_buffers, 173 &mut batch_upload_textures, 174 &mut batch_upload_copies, 175 &mut stats, 176 ); 177 } else { 178 let upload_start_time = zeitstempel::now(); 179 180 stats.bytes_uploaded += uploader.upload( 181 &mut renderer.device, 182 texture, 183 rect, 184 stride, 185 format_override, 186 data.as_ptr(), 187 data.len() 188 ); 189 190 stats.upload_time += zeitstempel::now() - upload_start_time; 191 } 192 193 if let TextureUpdateSource::External { id, channel_index } = source { 194 let handler = renderer.external_image_handler 195 .as_mut() 196 .expect("Found external image, but no handler set!"); 197 handler.unlock(id, channel_index); 198 } 199 } 200 } 201 202 let upload_start_time = zeitstempel::now(); 203 // Upload batched texture updates to their temporary textures. 204 for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() { 205 let texture = &batch_upload_textures[batch_buffer.texture_index]; 206 match batch_buffer.staging_buffer { 207 StagingBufferKind::Pbo(pbo) => { 208 stats.bytes_uploaded += uploader.upload_staged( 209 &mut renderer.device, 210 texture, 211 DeviceIntRect::from_size(texture.get_dimensions()), 212 None, 213 pbo, 214 ); 215 } 216 StagingBufferKind::CpuBuffer { bytes, .. } => { 217 let bpp = texture.get_format().bytes_per_pixel(); 218 stats.bytes_uploaded += uploader.upload( 219 &mut renderer.device, 220 texture, 221 batch_buffer.upload_rect, 222 Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp), 223 None, 224 bytes.as_ptr(), 225 bytes.len() 226 ); 227 renderer.staging_texture_pool.return_temporary_buffer(bytes); 228 } 229 StagingBufferKind::Image { bytes, stride } => { 230 stats.bytes_uploaded += uploader.upload( 231 &mut renderer.device, 232 texture, 233 batch_buffer.upload_rect, 234 stride, 235 None, 236 bytes.as_ptr(), 237 bytes.len() 238 ); 239 } 240 } 241 } 242 stats.upload_time += zeitstempel::now() - upload_start_time; 243 244 245 // Flush all uploads, batched or otherwise. 246 let flush_start_time = zeitstempel::now(); 247 uploader.flush(&mut renderer.device); 248 stats.upload_time += zeitstempel::now() - flush_start_time; 249 250 if !batch_upload_copies.is_empty() { 251 // Copy updates that were batch uploaded to their correct destination in the texture cache. 252 // Sort them by destination and source to minimize framebuffer binding changes. 253 batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index)); 254 255 let gpu_copy_start = zeitstempel::now(); 256 257 if renderer.device.use_draw_calls_for_texture_copy() { 258 // Some drivers have a very high CPU overhead when submitting hundreds of small blit 259 // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a 260 // few hundred blits). In this case we do the copy with batched draw calls. 261 copy_from_staging_to_cache_using_draw_calls( 262 renderer, 263 &mut stats, 264 &batch_upload_textures, 265 batch_upload_copies, 266 ); 267 } else { 268 copy_from_staging_to_cache( 269 renderer, 270 &batch_upload_textures, 271 batch_upload_copies, 272 ); 273 } 274 275 stats.gpu_copy_commands_time += zeitstempel::now() - gpu_copy_start; 276 } 277 278 for texture in batch_upload_textures.drain(..) { 279 renderer.staging_texture_pool.return_texture(texture); 280 } 281 282 // Update the profile counters. We use add instead of set because 283 // this function can be called several times per frame. 284 // We don't update the counters when their value is zero, so that 285 // the profiler can treat them as events and we can get notified 286 // when they happen. 287 288 let upload_total = zeitstempel::now() - upload_total_start; 289 renderer.profile.add( 290 profiler::TOTAL_UPLOAD_TIME, 291 profiler::ns_to_ms(upload_total) 292 ); 293 294 if num_updates > 0 { 295 renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates); 296 } 297 298 if stats.bytes_uploaded > 0 { 299 renderer.profile.add( 300 profiler::TEXTURE_UPLOADS_MEM, 301 profiler::bytes_to_mb(stats.bytes_uploaded) 302 ); 303 } 304 305 if stats.cpu_copy_time > 0 { 306 renderer.profile.add( 307 profiler::UPLOAD_CPU_COPY_TIME, 308 profiler::ns_to_ms(stats.cpu_copy_time) 309 ); 310 } 311 if stats.upload_time > 0 { 312 renderer.profile.add( 313 profiler::UPLOAD_TIME, 314 profiler::ns_to_ms(stats.upload_time) 315 ); 316 } 317 if stats.texture_alloc_time > 0 { 318 renderer.profile.add( 319 profiler::STAGING_TEXTURE_ALLOCATION_TIME, 320 profiler::ns_to_ms(stats.texture_alloc_time) 321 ); 322 } 323 if stats.cpu_buffer_alloc_time > 0 { 324 renderer.profile.add( 325 profiler::CPU_TEXTURE_ALLOCATION_TIME, 326 profiler::ns_to_ms(stats.cpu_buffer_alloc_time) 327 ); 328 } 329 if stats.num_draw_calls > 0{ 330 renderer.profile.add( 331 profiler::UPLOAD_NUM_COPY_BATCHES, 332 stats.num_draw_calls 333 ); 334 } 335 336 if stats.gpu_copy_commands_time > 0 { 337 renderer.profile.add( 338 profiler::UPLOAD_GPU_COPY_TIME, 339 profiler::ns_to_ms(stats.gpu_copy_commands_time) 340 ); 341 } 342 343 let add_markers = profiler::thread_is_being_profiled(); 344 if add_markers && stats.bytes_uploaded > 0 { 345 let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded); 346 profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total)); 347 } 348 } 349 350 /// Copy an item into a batched upload staging buffer. 351 fn copy_into_staging_buffer<'a>( 352 device: &mut Device, 353 uploader: &mut TextureUploader< 'a>, 354 staging_texture_pool: &mut UploadTexturePool, 355 update_rect: DeviceIntRect, 356 update_stride: Option<i32>, 357 data: &[u8], 358 dest_texture_id: CacheTextureId, 359 texture: &Texture, 360 batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, 361 batch_upload_textures: &mut Vec<Texture>, 362 batch_upload_copies: &mut Vec<BatchUploadCopy>, 363 stats: &mut UploadStats 364 ) { 365 let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format()) 366 .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new())); 367 368 // Allocate a region within the staging buffer for this update. If there is 369 // no room in an existing buffer then allocate another texture and buffer. 370 let (slice, origin) = match allocator.allocate(&update_rect.size()) { 371 Some((slice, origin)) => (slice, origin), 372 None => { 373 let new_slice = FreeRectSlice(buffers.len() as u32); 374 allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size()); 375 376 let texture_alloc_time_start = zeitstempel::now(); 377 let staging_texture = staging_texture_pool.get_texture(device, texture.get_format()); 378 stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start; 379 380 let texture_index = batch_upload_textures.len(); 381 batch_upload_textures.push(staging_texture); 382 383 let cpu_buffer_alloc_start_time = zeitstempel::now(); 384 let staging_buffer = match device.upload_method() { 385 UploadMethod::Immediate => StagingBufferKind::CpuBuffer { 386 bytes: staging_texture_pool.get_temporary_buffer(), 387 }, 388 UploadMethod::PixelBuffer(_) => { 389 let pbo = uploader.stage( 390 device, 391 texture.get_format(), 392 BATCH_UPLOAD_TEXTURE_SIZE, 393 ).unwrap(); 394 395 StagingBufferKind::Pbo(pbo) 396 } 397 }; 398 stats.cpu_buffer_alloc_time += zeitstempel::now() - cpu_buffer_alloc_start_time; 399 400 buffers.push(BatchUploadBuffer { 401 staging_buffer, 402 texture_index, 403 upload_rect: DeviceIntRect::zero() 404 }); 405 406 (new_slice, DeviceIntPoint::zero()) 407 } 408 }; 409 let buffer = &mut buffers[slice.0 as usize]; 410 let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size()); 411 buffer.upload_rect = buffer.upload_rect.union(&allocated_rect); 412 413 batch_upload_copies.push(BatchUploadCopy { 414 src_texture_index: buffer.texture_index, 415 src_offset: allocated_rect.min, 416 dest_texture_id, 417 dest_offset: update_rect.min, 418 size: update_rect.size(), 419 }); 420 421 unsafe { 422 let memcpy_start_time = zeitstempel::now(); 423 let bpp = texture.get_format().bytes_per_pixel() as usize; 424 let width_bytes = update_rect.width() as usize * bpp; 425 let src_stride = update_stride.map_or(width_bytes, |stride| { 426 assert!(stride >= 0); 427 stride as usize 428 }); 429 let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes; 430 assert!(src_size <= data.len()); 431 432 let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size); 433 let (dst_stride, dst) = match &mut buffer.staging_buffer { 434 StagingBufferKind::Pbo(buffer) => ( 435 buffer.get_stride(), 436 buffer.get_mapping(), 437 ), 438 StagingBufferKind::CpuBuffer { bytes } => ( 439 BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp, 440 &mut bytes[..], 441 ), 442 StagingBufferKind::Image { .. } => unreachable!(), 443 }; 444 445 // copy the data line-by-line in to the buffer so that we do not overwrite 446 // any other region of the buffer. 447 for y in 0..allocated_rect.height() as usize { 448 let src_start = y * src_stride; 449 let src_end = src_start + width_bytes; 450 let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride + 451 allocated_rect.min.x as usize * bpp; 452 let dst_end = dst_start + width_bytes; 453 454 dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) 455 } 456 457 stats.cpu_copy_time += zeitstempel::now() - memcpy_start_time; 458 } 459 } 460 461 /// Take this code path instead of copying into a staging CPU buffer when the image 462 /// we would copy is large enough that it's unlikely anything else would fit in the 463 /// buffer, therefore we might as well copy directly from the source image's pixels. 464 fn skip_staging_buffer<'a>( 465 device: &mut Device, 466 staging_texture_pool: &mut UploadTexturePool, 467 update_rect: DeviceIntRect, 468 stride: Option<i32>, 469 data: Arc<Vec<u8>>, 470 dest_texture_id: CacheTextureId, 471 texture: &Texture, 472 batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, 473 batch_upload_textures: &mut Vec<Texture>, 474 batch_upload_copies: &mut Vec<BatchUploadCopy>, 475 stats: &mut UploadStats 476 ) { 477 let (_, buffers) = batch_upload_buffers.entry(texture.get_format()) 478 .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new())); 479 480 let texture_alloc_time_start = zeitstempel::now(); 481 let staging_texture = staging_texture_pool.get_texture(device, texture.get_format()); 482 stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start; 483 484 let texture_index = batch_upload_textures.len(); 485 batch_upload_textures.push(staging_texture); 486 487 buffers.push(BatchUploadBuffer { 488 staging_buffer: StagingBufferKind::Image { bytes: data, stride }, 489 texture_index, 490 upload_rect: DeviceIntRect::from_size(update_rect.size()) 491 }); 492 493 batch_upload_copies.push(BatchUploadCopy { 494 src_texture_index: texture_index, 495 src_offset: point2(0, 0), 496 dest_texture_id, 497 dest_offset: update_rect.min, 498 size: update_rect.size(), 499 }); 500 } 501 502 503 /// Copy from the staging PBOs or textures to texture cache textures using blit commands. 504 /// 505 /// Using blits instead of draw calls is supposedly more efficient but some drivers have 506 /// a very high per-command overhead so in some configurations we end up using 507 /// copy_from_staging_to_cache_using_draw_calls instead. 508 fn copy_from_staging_to_cache( 509 renderer: &mut Renderer, 510 batch_upload_textures: &[Texture], 511 batch_upload_copies: Vec<BatchUploadCopy>, 512 ) { 513 for copy in batch_upload_copies { 514 let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture; 515 516 renderer.device.copy_texture_sub_region( 517 &batch_upload_textures[copy.src_texture_index], 518 copy.src_offset.x as _, 519 copy.src_offset.y as _, 520 dest_texture, 521 copy.dest_offset.x as _, 522 copy.dest_offset.y as _, 523 copy.size.width as _, 524 copy.size.height as _, 525 ); 526 } 527 } 528 529 /// Generate and submit composite shader batches to copy from 530 /// the staging textures to the destination cache textures. 531 /// 532 /// If this shows up in GPU time ptofiles we could replace it with 533 /// a simpler shader (composite.glsl is already quite simple). 534 fn copy_from_staging_to_cache_using_draw_calls( 535 renderer: &mut Renderer, 536 stats: &mut UploadStats, 537 batch_upload_textures: &[Texture], 538 batch_upload_copies: Vec<BatchUploadCopy>, 539 ) { 540 let mut copy_instances = Vec::new(); 541 let mut prev_src = None; 542 let mut prev_dst = None; 543 let mut dst_texture_size = DeviceSize::new(0.0, 0.0); 544 545 for copy in batch_upload_copies { 546 547 let src_changed = prev_src != Some(copy.src_texture_index); 548 let dst_changed = prev_dst != Some(copy.dest_texture_id); 549 550 if (src_changed || dst_changed) && !copy_instances.is_empty() { 551 renderer.draw_instanced_batch( 552 ©_instances, 553 VertexArrayKind::Copy, 554 // We bind the staging texture manually because it isn't known 555 // to the texture resolver. 556 &BatchTextures::empty(), 557 &mut RendererStats::default(), 558 ); 559 560 stats.num_draw_calls += 1; 561 copy_instances.clear(); 562 } 563 564 if dst_changed { 565 let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture; 566 dst_texture_size = dest_texture.get_dimensions().to_f32(); 567 568 let draw_target = DrawTarget::from_texture(dest_texture, false); 569 renderer.device.bind_draw_target(draw_target); 570 571 renderer.shaders 572 .borrow_mut() 573 .ps_copy() 574 .bind( 575 &mut renderer.device, 576 &Transform3D::identity(), 577 None, 578 &mut renderer.renderer_errors, 579 &mut renderer.profile, 580 &mut renderer.command_log, 581 ); 582 583 prev_dst = Some(copy.dest_texture_id); 584 } 585 586 if src_changed { 587 renderer.device.bind_texture( 588 TextureSampler::Color0, 589 &batch_upload_textures[copy.src_texture_index], 590 Swizzle::default(), 591 ); 592 593 prev_src = Some(copy.src_texture_index) 594 } 595 596 let src_rect = DeviceRect::from_origin_and_size( 597 copy.src_offset.to_f32(), 598 copy.size.to_f32(), 599 ); 600 601 let dst_rect = DeviceRect::from_origin_and_size( 602 copy.dest_offset.to_f32(), 603 copy.size.to_f32(), 604 ); 605 606 copy_instances.push(CopyInstance { 607 src_rect, 608 dst_rect, 609 dst_texture_size, 610 }); 611 } 612 613 if !copy_instances.is_empty() { 614 renderer.draw_instanced_batch( 615 ©_instances, 616 VertexArrayKind::Copy, 617 &BatchTextures::empty(), 618 &mut RendererStats::default(), 619 ); 620 621 stats.num_draw_calls += 1; 622 } 623 } 624 625 /// A very basic pool to avoid reallocating staging textures as well as staging 626 /// CPU side buffers. 627 pub struct UploadTexturePool { 628 /// The textures in the pool associated with a last used frame index. 629 /// 630 /// The outer array corresponds to each of teh three supported texture formats. 631 textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT], 632 // Frame at which to deallocate some textures if there are too many in the pool, 633 // for each format. 634 delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT], 635 current_frame: u64, 636 637 /// Temporary buffers that are used when using staging uploads + glTexImage2D. 638 /// 639 /// Temporary buffers aren't used asynchronously so they can be reused every frame. 640 /// To keep things simple we always allocate enough memory for formats with four bytes 641 /// per pixel (more than we need for alpha-only textures but it works just as well). 642 temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>, 643 min_temporary_buffers: usize, 644 delay_buffer_deallocation: u64, 645 } 646 647 impl UploadTexturePool { 648 pub fn new() -> Self { 649 UploadTexturePool { 650 textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()], 651 delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT], 652 current_frame: 0, 653 temporary_buffers: Vec::new(), 654 min_temporary_buffers: 0, 655 delay_buffer_deallocation: 0, 656 } 657 } 658 659 fn format_index(&self, format: ImageFormat) -> usize { 660 match format { 661 ImageFormat::RGBA8 => 0, 662 ImageFormat::BGRA8 => 1, 663 ImageFormat::R8 => 2, 664 ImageFormat::R16 => 3, 665 _ => { panic!("unexpected format {:?}", format); } 666 } 667 } 668 669 pub fn begin_frame(&mut self) { 670 self.current_frame += 1; 671 self.min_temporary_buffers = self.temporary_buffers.len(); 672 } 673 674 /// Create or reuse a staging texture. 675 /// 676 /// See also return_texture. 677 pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture { 678 679 // First try to reuse a texture from the pool. 680 // "available" here means hasn't been used for 2 frames to avoid stalls. 681 // No need to scan the vector. Newer textures are always pushed at the back 682 // of the vector so we know the first element is the least recently used. 683 let format_idx = self.format_index(format); 684 let can_reuse = self.textures[format_idx].get(0) 685 .map(|tex| self.current_frame - tex.1 > 2) 686 .unwrap_or(false); 687 688 if can_reuse { 689 return self.textures[format_idx].pop_front().unwrap().0; 690 } 691 692 // If we couldn't find an available texture, create a new one. 693 694 device.create_texture( 695 ImageBufferKind::Texture2D, 696 format, 697 BATCH_UPLOAD_TEXTURE_SIZE.width, 698 BATCH_UPLOAD_TEXTURE_SIZE.height, 699 TextureFilter::Nearest, 700 // Currently we need render target support as we always use glBlitFramebuffer 701 // to copy the texture data. Instead, we should use glCopyImageSubData on some 702 // platforms, and avoid creating the FBOs in that case. 703 Some(RenderTargetInfo { has_depth: false }), 704 ) 705 } 706 707 /// Hand the staging texture back to the pool after being done with uploads. 708 /// 709 /// The texture must have been obtained from this pool via get_texture. 710 pub fn return_texture(&mut self, texture: Texture) { 711 let format_idx = self.format_index(texture.get_format()); 712 self.textures[format_idx].push_back((texture, self.current_frame)); 713 } 714 715 /// Create or reuse a temporary CPU buffer. 716 /// 717 /// These buffers are used in the batched upload path when PBOs are not supported. 718 /// Content is first written to the temporary buffer and uploaded via a single 719 /// glTexSubImage2D call. 720 pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> { 721 let buffer = self.temporary_buffers.pop().unwrap_or_else(|| { 722 vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4] 723 }); 724 self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len()); 725 buffer 726 } 727 728 /// Return memory that was obtained from this pool via get_temporary_buffer. 729 pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) { 730 assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4); 731 self.temporary_buffers.push(buffer); 732 } 733 734 /// Deallocate this pool's CPU and GPU memory. 735 pub fn delete_textures(&mut self, device: &mut Device) { 736 for format in &mut self.textures { 737 while let Some(texture) = format.pop_back() { 738 device.delete_texture(texture.0) 739 } 740 } 741 self.temporary_buffers.clear(); 742 } 743 744 /// Deallocate some textures if there are too many for a long time. 745 pub fn end_frame(&mut self, device: &mut Device) { 746 for format_idx in 0..self.textures.len() { 747 // Count the number of reusable staging textures. 748 // if it stays high for a large number of frames, truncate it back to 8-ish 749 // over multiple frames. 750 751 let mut num_reusable_textures = 0; 752 for texture in &self.textures[format_idx] { 753 if self.current_frame - texture.1 > 2 { 754 num_reusable_textures += 1; 755 } 756 } 757 758 if num_reusable_textures < 8 { 759 // Don't deallocate textures for another 120 frames. 760 self.delay_texture_deallocation[format_idx] = self.current_frame + 120; 761 } 762 763 // Deallocate up to 4 staging textures every frame. 764 let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] { 765 num_reusable_textures.min(4) 766 } else { 767 0 768 }; 769 770 for _ in 0..to_remove { 771 let texture = self.textures[format_idx].pop_front().unwrap().0; 772 device.delete_texture(texture); 773 } 774 } 775 776 // Similar logic for temporary CPU buffers. Our calls to get and return 777 // temporary buffers should have been balanced for this frame, but the call 778 // get_temporary_buffer will allocate a buffer if the vec is empty. Since we 779 // carry these buffers from frame to frame, we keep track of the smallest 780 // length of the temporary_buffers vec that we encountered this frame. Those 781 // buffers were not touched and we deallocate some if there are a lot of them. 782 let unused_buffers = self.min_temporary_buffers; 783 if unused_buffers < 8 { 784 self.delay_buffer_deallocation = self.current_frame + 120; 785 } 786 let to_remove = if self.current_frame > self.delay_buffer_deallocation { 787 unused_buffers.min(4) 788 } else { 789 0 790 }; 791 for _ in 0..to_remove { 792 // Unlike textures it doesn't matter whether we pop from the front or back 793 // of the vector. 794 self.temporary_buffers.pop(); 795 } 796 } 797 798 pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) { 799 for buf in &self.temporary_buffers { 800 report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) }; 801 } 802 803 for format in &self.textures { 804 for texture in format { 805 report.upload_staging_textures += texture.0.size_in_bytes(); 806 } 807 } 808 } 809 } 810 811 struct UploadStats { 812 num_draw_calls: u32, 813 upload_time: u64, 814 cpu_buffer_alloc_time: u64, 815 texture_alloc_time: u64, 816 cpu_copy_time: u64, 817 gpu_copy_commands_time: u64, 818 bytes_uploaded: usize, 819 items_uploaded: usize, 820 } 821 822 #[derive(Debug)] 823 enum StagingBufferKind<'a> { 824 Pbo(UploadStagingBuffer<'a>), 825 CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }, 826 Image { bytes: Arc<Vec<u8>>, stride: Option<i32> }, 827 } 828 #[derive(Debug)] 829 struct BatchUploadBuffer<'a> { 830 staging_buffer: StagingBufferKind<'a>, 831 texture_index: usize, 832 // A rectangle containing all items going into this staging texture, so 833 // that we can avoid uploading the entire area if we are using glTexSubImage2d. 834 upload_rect: DeviceIntRect, 835 } 836 837 // On some devices performing many small texture uploads is slow, so instead we batch 838 // updates in to a small number of uploads to temporary textures, then copy from those 839 // textures to the correct place in the texture cache. 840 // A list of temporary textures that batches of updates are uploaded to. 841 #[derive(Debug)] 842 struct BatchUploadCopy { 843 // Index within batch_upload_textures 844 src_texture_index: usize, 845 src_offset: DeviceIntPoint, 846 dest_texture_id: CacheTextureId, 847 dest_offset: DeviceIntPoint, 848 size: DeviceIntSize, 849 }