tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

upload.rs (32915B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 //! This module contains the convoluted logic that goes into uploading content into
      6 //! the texture cache's textures.
      7 //!
      8 //! We need to support various combinations of code paths depending on the quirks of
      9 //! each hardware/driver configuration:
     10 //! - direct upload,
     11 //! - staged upload via a pixel buffer object,
     12 //! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
     13 //! - copy from the staging to destination textures, either via blits or batched draw calls.
     14 //!
     15 //! Conceptually a lot of this logic should probably be in the device module, but some code
     16 //! here relies on submitting draw calls via the renderer.
     17 
     18 
     19 use std::mem;
     20 use std::collections::VecDeque;
     21 use std::sync::Arc;
     22 use std::time::Duration;
     23 use euclid::{Transform3D, point2};
     24 use malloc_size_of::MallocSizeOfOps;
     25 use api::units::*;
     26 use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
     27 use crate::renderer::{
     28    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
     29 };
     30 use crate::internal_types::{
     31    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
     32    CacheTextureId, RenderTargetInfo,
     33 };
     34 use crate::device::{
     35    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
     36    TextureFilter,
     37 };
     38 use crate::gpu_types::CopyInstance;
     39 use crate::batch::BatchTextures;
     40 use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
     41 use crate::profiler;
     42 use crate::render_api::MemoryReport;
     43 
     44 pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
     45 const BATCH_UPLOAD_FORMAT_COUNT: usize = 4;
     46 
     47 /// Upload a number of items to texture cache textures.
     48 ///
     49 /// This is the main entry point of the texture cache upload code.
     50 /// See also the module documentation for more information.
     51 pub fn upload_to_texture_cache(
     52    renderer: &mut Renderer,
     53    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
     54 ) {
     55 
     56    let mut stats = UploadStats {
     57        num_draw_calls: 0,
     58        upload_time: 0,
     59        cpu_buffer_alloc_time: 0,
     60        texture_alloc_time: 0,
     61        cpu_copy_time: 0,
     62        gpu_copy_commands_time: 0,
     63        bytes_uploaded: 0,
     64        items_uploaded: 0,
     65    };
     66 
     67    let upload_total_start = zeitstempel::now();
     68 
     69    let mut batch_upload_textures = Vec::new();
     70 
     71    // A list of copies that must be performed from the temporary textures to the texture cache.
     72    let mut batch_upload_copies = Vec::new();
     73 
     74    // For each texture format, this stores a list of staging buffers
     75    // and a texture allocator for packing the buffers.
     76    let mut batch_upload_buffers = FastHashMap::default();
     77 
     78    // For best performance we use a single TextureUploader for all uploads.
     79    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
     80    let mut uploader = renderer.device.upload_texture(
     81        &mut renderer.texture_upload_pbo_pool,
     82    );
     83 
     84    let num_updates = update_list.len();
     85 
     86    for (texture_id, updates) in update_list {
     87        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
     88        for update in updates {
     89            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
     90            let mut arc_data = None;
     91            let dummy_data;
     92            let data = match source {
     93                TextureUpdateSource::Bytes { ref data } => {
     94                    arc_data = Some(data.clone());
     95                    &data[offset as usize ..]
     96                }
     97                TextureUpdateSource::External { id, channel_index } => {
     98                    let handler = renderer.external_image_handler
     99                        .as_mut()
    100                        .expect("Found external image, but no handler set!");
    101                    // The filter is only relevant for NativeTexture external images.
    102                    match handler.lock(id, channel_index, false).source {
    103                        ExternalImageSource::RawData(data) => {
    104                            &data[offset as usize ..]
    105                        }
    106                        ExternalImageSource::Invalid => {
    107                            // Create a local buffer to fill the pbo.
    108                            let bpp = texture.get_format().bytes_per_pixel();
    109                            let width = stride.unwrap_or(rect.width() * bpp);
    110                            let total_size = width * rect.height();
    111                            // WR haven't support RGBAF32 format in texture_cache, so
    112                            // we use u8 type here.
    113                            dummy_data = vec![0xFFu8; total_size as usize];
    114                            &dummy_data
    115                        }
    116                        ExternalImageSource::NativeTexture(eid) => {
    117                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
    118                        }
    119                    }
    120                }
    121                TextureUpdateSource::DebugClear => {
    122                    let draw_target = DrawTarget::from_texture(
    123                        texture,
    124                        false,
    125                    );
    126                    renderer.device.bind_draw_target(draw_target);
    127                    renderer.device.clear_target(
    128                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
    129                        None,
    130                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
    131                    );
    132 
    133                    continue;
    134                }
    135            };
    136 
    137            stats.items_uploaded += 1;
    138 
    139            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
    140                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
    141                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
    142                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
    143                rect.area() < renderer.device.batched_upload_threshold();
    144 
    145            if use_batch_upload
    146                && arc_data.is_some()
    147                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
    148                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
    149                skip_staging_buffer(
    150                    &mut renderer.device,
    151                    &mut renderer.staging_texture_pool,
    152                    rect,
    153                    stride,
    154                    arc_data.unwrap(),
    155                    texture_id,
    156                    texture,
    157                    &mut batch_upload_buffers,
    158                    &mut batch_upload_textures,
    159                    &mut batch_upload_copies,
    160                    &mut stats,
    161                );
    162            } else if use_batch_upload {
    163                copy_into_staging_buffer(
    164                    &mut renderer.device,
    165                    &mut uploader,
    166                    &mut renderer.staging_texture_pool,
    167                    rect,
    168                    stride,
    169                    data,
    170                    texture_id,
    171                    texture,
    172                    &mut batch_upload_buffers,
    173                    &mut batch_upload_textures,
    174                    &mut batch_upload_copies,
    175                    &mut stats,
    176                );
    177            } else {
    178                let upload_start_time = zeitstempel::now();
    179 
    180                stats.bytes_uploaded += uploader.upload(
    181                    &mut renderer.device,
    182                    texture,
    183                    rect,
    184                    stride,
    185                    format_override,
    186                    data.as_ptr(),
    187                    data.len()
    188                );
    189 
    190                stats.upload_time += zeitstempel::now() - upload_start_time;
    191            }
    192 
    193            if let TextureUpdateSource::External { id, channel_index } = source {
    194                let handler = renderer.external_image_handler
    195                    .as_mut()
    196                    .expect("Found external image, but no handler set!");
    197                handler.unlock(id, channel_index);
    198            }
    199        }
    200    }
    201 
    202    let upload_start_time = zeitstempel::now();
    203    // Upload batched texture updates to their temporary textures.
    204    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
    205        let texture = &batch_upload_textures[batch_buffer.texture_index];
    206        match batch_buffer.staging_buffer {
    207            StagingBufferKind::Pbo(pbo) => {
    208                stats.bytes_uploaded += uploader.upload_staged(
    209                    &mut renderer.device,
    210                    texture,
    211                    DeviceIntRect::from_size(texture.get_dimensions()),
    212                    None,
    213                    pbo,
    214                );
    215            }
    216            StagingBufferKind::CpuBuffer { bytes, .. } => {
    217                let bpp = texture.get_format().bytes_per_pixel();
    218                stats.bytes_uploaded += uploader.upload(
    219                    &mut renderer.device,
    220                    texture,
    221                    batch_buffer.upload_rect,
    222                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
    223                    None,
    224                    bytes.as_ptr(),
    225                    bytes.len()
    226                );
    227                renderer.staging_texture_pool.return_temporary_buffer(bytes);
    228            }
    229            StagingBufferKind::Image { bytes, stride } => {
    230                stats.bytes_uploaded += uploader.upload(
    231                    &mut renderer.device,
    232                    texture,
    233                    batch_buffer.upload_rect,
    234                    stride,
    235                    None,
    236                    bytes.as_ptr(),
    237                    bytes.len()
    238                );
    239            }
    240        }
    241    }
    242    stats.upload_time += zeitstempel::now() - upload_start_time;
    243 
    244 
    245    // Flush all uploads, batched or otherwise.
    246    let flush_start_time = zeitstempel::now();
    247    uploader.flush(&mut renderer.device);
    248    stats.upload_time += zeitstempel::now() - flush_start_time;
    249 
    250    if !batch_upload_copies.is_empty() {
    251        // Copy updates that were batch uploaded to their correct destination in the texture cache.
    252        // Sort them by destination and source to minimize framebuffer binding changes.
    253        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
    254 
    255        let gpu_copy_start = zeitstempel::now();
    256 
    257        if renderer.device.use_draw_calls_for_texture_copy() {
    258            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
    259            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
    260            // few hundred blits). In this case we do the copy with batched draw calls.
    261            copy_from_staging_to_cache_using_draw_calls(
    262                renderer,
    263                &mut stats,
    264                &batch_upload_textures,
    265                batch_upload_copies,
    266            );
    267        } else {
    268            copy_from_staging_to_cache(
    269                renderer,
    270                &batch_upload_textures,
    271                batch_upload_copies,
    272            );
    273        }
    274 
    275        stats.gpu_copy_commands_time += zeitstempel::now() - gpu_copy_start;
    276    }
    277 
    278    for texture in batch_upload_textures.drain(..) {
    279        renderer.staging_texture_pool.return_texture(texture);
    280    }
    281 
    282    // Update the profile counters. We use add instead of set because
    283    // this function can be called several times per frame.
    284    // We don't update the counters when their value is zero, so that
    285    // the profiler can treat them as events and we can get notified
    286    // when they happen.
    287 
    288    let upload_total = zeitstempel::now() - upload_total_start;
    289    renderer.profile.add(
    290        profiler::TOTAL_UPLOAD_TIME,
    291        profiler::ns_to_ms(upload_total)
    292    );
    293 
    294    if num_updates > 0 {
    295        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
    296    }
    297 
    298    if stats.bytes_uploaded > 0 {
    299        renderer.profile.add(
    300            profiler::TEXTURE_UPLOADS_MEM,
    301            profiler::bytes_to_mb(stats.bytes_uploaded)
    302        );
    303    }
    304 
    305    if stats.cpu_copy_time > 0 {
    306        renderer.profile.add(
    307            profiler::UPLOAD_CPU_COPY_TIME,
    308            profiler::ns_to_ms(stats.cpu_copy_time)
    309        );
    310    }
    311    if stats.upload_time > 0 {
    312        renderer.profile.add(
    313            profiler::UPLOAD_TIME,
    314            profiler::ns_to_ms(stats.upload_time)
    315        );
    316    }
    317    if stats.texture_alloc_time > 0 {
    318        renderer.profile.add(
    319            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
    320            profiler::ns_to_ms(stats.texture_alloc_time)
    321        );
    322    }
    323    if stats.cpu_buffer_alloc_time > 0 {
    324        renderer.profile.add(
    325            profiler::CPU_TEXTURE_ALLOCATION_TIME,
    326            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
    327        );
    328    }
    329    if stats.num_draw_calls > 0{
    330        renderer.profile.add(
    331            profiler::UPLOAD_NUM_COPY_BATCHES,
    332            stats.num_draw_calls
    333        );
    334    }
    335 
    336    if stats.gpu_copy_commands_time > 0 {
    337        renderer.profile.add(
    338            profiler::UPLOAD_GPU_COPY_TIME,
    339            profiler::ns_to_ms(stats.gpu_copy_commands_time)
    340        );
    341    }
    342 
    343    let add_markers = profiler::thread_is_being_profiled();
    344    if add_markers && stats.bytes_uploaded > 0 {
    345    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
    346    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
    347    }
    348 }
    349 
    350 /// Copy an item into a batched upload staging buffer.
    351 fn copy_into_staging_buffer<'a>(
    352    device: &mut Device,
    353    uploader: &mut TextureUploader< 'a>,
    354    staging_texture_pool: &mut UploadTexturePool,
    355    update_rect: DeviceIntRect,
    356    update_stride: Option<i32>,
    357    data: &[u8],
    358    dest_texture_id: CacheTextureId,
    359    texture: &Texture,
    360    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
    361    batch_upload_textures: &mut Vec<Texture>,
    362    batch_upload_copies: &mut Vec<BatchUploadCopy>,
    363    stats: &mut UploadStats
    364 ) {
    365    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
    366        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
    367 
    368    // Allocate a region within the staging buffer for this update. If there is
    369    // no room in an existing buffer then allocate another texture and buffer.
    370    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
    371        Some((slice, origin)) => (slice, origin),
    372        None => {
    373            let new_slice = FreeRectSlice(buffers.len() as u32);
    374            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
    375 
    376            let texture_alloc_time_start = zeitstempel::now();
    377            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
    378            stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
    379 
    380            let texture_index = batch_upload_textures.len();
    381            batch_upload_textures.push(staging_texture);
    382 
    383            let cpu_buffer_alloc_start_time = zeitstempel::now();
    384            let staging_buffer = match device.upload_method() {
    385                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
    386                    bytes: staging_texture_pool.get_temporary_buffer(),
    387                },
    388                UploadMethod::PixelBuffer(_) => {
    389                    let pbo = uploader.stage(
    390                        device,
    391                        texture.get_format(),
    392                        BATCH_UPLOAD_TEXTURE_SIZE,
    393                    ).unwrap();
    394 
    395                    StagingBufferKind::Pbo(pbo)
    396                }
    397            };
    398            stats.cpu_buffer_alloc_time += zeitstempel::now() - cpu_buffer_alloc_start_time;
    399 
    400            buffers.push(BatchUploadBuffer {
    401                staging_buffer,
    402                texture_index,
    403                upload_rect: DeviceIntRect::zero()
    404            });
    405 
    406            (new_slice, DeviceIntPoint::zero())
    407        }
    408    };
    409    let buffer = &mut buffers[slice.0 as usize];
    410    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
    411    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
    412 
    413    batch_upload_copies.push(BatchUploadCopy {
    414        src_texture_index: buffer.texture_index,
    415        src_offset: allocated_rect.min,
    416        dest_texture_id,
    417        dest_offset: update_rect.min,
    418        size: update_rect.size(),
    419    });
    420 
    421    unsafe {
    422        let memcpy_start_time = zeitstempel::now();
    423        let bpp = texture.get_format().bytes_per_pixel() as usize;
    424        let width_bytes = update_rect.width() as usize * bpp;
    425        let src_stride = update_stride.map_or(width_bytes, |stride| {
    426            assert!(stride >= 0);
    427            stride as usize
    428        });
    429        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
    430        assert!(src_size <= data.len());
    431 
    432        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
    433        let (dst_stride, dst) = match &mut buffer.staging_buffer {
    434            StagingBufferKind::Pbo(buffer) => (
    435                buffer.get_stride(),
    436                buffer.get_mapping(),
    437            ),
    438            StagingBufferKind::CpuBuffer { bytes } => (
    439                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
    440                &mut bytes[..],
    441            ),
    442            StagingBufferKind::Image { .. } => unreachable!(),
    443        };
    444 
    445        // copy the data line-by-line in to the buffer so that we do not overwrite
    446        // any other region of the buffer.
    447        for y in 0..allocated_rect.height() as usize {
    448            let src_start = y * src_stride;
    449            let src_end = src_start + width_bytes;
    450            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
    451                allocated_rect.min.x as usize * bpp;
    452            let dst_end = dst_start + width_bytes;
    453 
    454            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
    455        }
    456 
    457        stats.cpu_copy_time += zeitstempel::now() - memcpy_start_time;
    458    }
    459 }
    460 
    461 /// Take this code path instead of copying into a staging CPU buffer when the image
    462 /// we would copy is large enough that it's unlikely anything else would fit in the
    463 /// buffer, therefore we might as well copy directly from the source image's pixels.
    464 fn skip_staging_buffer<'a>(
    465    device: &mut Device,
    466    staging_texture_pool: &mut UploadTexturePool,
    467    update_rect: DeviceIntRect,
    468    stride: Option<i32>,
    469    data: Arc<Vec<u8>>,
    470    dest_texture_id: CacheTextureId,
    471    texture: &Texture,
    472    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
    473    batch_upload_textures: &mut Vec<Texture>,
    474    batch_upload_copies: &mut Vec<BatchUploadCopy>,
    475    stats: &mut UploadStats
    476 ) {
    477    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
    478        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
    479 
    480    let texture_alloc_time_start = zeitstempel::now();
    481    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
    482    stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
    483 
    484    let texture_index = batch_upload_textures.len();
    485    batch_upload_textures.push(staging_texture);
    486 
    487    buffers.push(BatchUploadBuffer {
    488        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
    489        texture_index,
    490        upload_rect: DeviceIntRect::from_size(update_rect.size())
    491    });
    492 
    493    batch_upload_copies.push(BatchUploadCopy {
    494        src_texture_index: texture_index,
    495        src_offset: point2(0, 0),
    496        dest_texture_id,
    497        dest_offset: update_rect.min,
    498        size: update_rect.size(),
    499    });
    500 }
    501 
    502 
    503 /// Copy from the staging PBOs or textures to texture cache textures using blit commands.
    504 ///
    505 /// Using blits instead of draw calls is supposedly more efficient but some drivers have
    506 /// a very high per-command overhead so in some configurations we end up using
    507 /// copy_from_staging_to_cache_using_draw_calls instead.
    508 fn copy_from_staging_to_cache(
    509    renderer: &mut Renderer,
    510    batch_upload_textures: &[Texture],
    511    batch_upload_copies: Vec<BatchUploadCopy>,
    512 ) {
    513    for copy in batch_upload_copies {
    514        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
    515 
    516        renderer.device.copy_texture_sub_region(
    517            &batch_upload_textures[copy.src_texture_index],
    518            copy.src_offset.x as _,
    519            copy.src_offset.y as _,
    520            dest_texture,
    521            copy.dest_offset.x as _,
    522            copy.dest_offset.y as _,
    523            copy.size.width as _,
    524            copy.size.height as _,
    525        );
    526    }
    527 }
    528 
    529 /// Generate and submit composite shader batches to copy from
    530 /// the staging textures to the destination cache textures.
    531 ///
    532 /// If this shows up in GPU time ptofiles we could replace it with
    533 /// a simpler shader (composite.glsl is already quite simple).
    534 fn copy_from_staging_to_cache_using_draw_calls(
    535    renderer: &mut Renderer,
    536    stats: &mut UploadStats,
    537    batch_upload_textures: &[Texture],
    538    batch_upload_copies: Vec<BatchUploadCopy>,
    539 ) {
    540    let mut copy_instances = Vec::new();
    541    let mut prev_src = None;
    542    let mut prev_dst = None;
    543    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
    544 
    545    for copy in batch_upload_copies {
    546 
    547        let src_changed = prev_src != Some(copy.src_texture_index);
    548        let dst_changed = prev_dst != Some(copy.dest_texture_id);
    549 
    550        if (src_changed || dst_changed) && !copy_instances.is_empty() {
    551            renderer.draw_instanced_batch(
    552                &copy_instances,
    553                VertexArrayKind::Copy,
    554                // We bind the staging texture manually because it isn't known
    555                // to the texture resolver.
    556                &BatchTextures::empty(),
    557                &mut RendererStats::default(),
    558            );
    559 
    560            stats.num_draw_calls += 1;
    561            copy_instances.clear();
    562        }
    563 
    564        if dst_changed {
    565            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
    566            dst_texture_size = dest_texture.get_dimensions().to_f32();
    567 
    568            let draw_target = DrawTarget::from_texture(dest_texture, false);
    569            renderer.device.bind_draw_target(draw_target);
    570 
    571            renderer.shaders
    572                .borrow_mut()
    573                .ps_copy()
    574                .bind(
    575                    &mut renderer.device,
    576                    &Transform3D::identity(),
    577                    None,
    578                    &mut renderer.renderer_errors,
    579                    &mut renderer.profile,
    580                    &mut renderer.command_log,
    581                );
    582 
    583            prev_dst = Some(copy.dest_texture_id);
    584        }
    585 
    586        if src_changed {
    587            renderer.device.bind_texture(
    588                TextureSampler::Color0,
    589                &batch_upload_textures[copy.src_texture_index],
    590                Swizzle::default(),
    591            );
    592 
    593            prev_src = Some(copy.src_texture_index)
    594        }
    595 
    596        let src_rect = DeviceRect::from_origin_and_size(
    597            copy.src_offset.to_f32(),
    598            copy.size.to_f32(),
    599        );
    600 
    601        let dst_rect = DeviceRect::from_origin_and_size(
    602            copy.dest_offset.to_f32(),
    603            copy.size.to_f32(),
    604        );
    605 
    606        copy_instances.push(CopyInstance {
    607            src_rect,
    608            dst_rect,
    609            dst_texture_size,
    610        });
    611    }
    612 
    613    if !copy_instances.is_empty() {
    614        renderer.draw_instanced_batch(
    615            &copy_instances,
    616            VertexArrayKind::Copy,
    617            &BatchTextures::empty(),
    618            &mut RendererStats::default(),
    619        );
    620 
    621        stats.num_draw_calls += 1;
    622    }
    623 }
    624 
    625 /// A very basic pool to avoid reallocating staging textures as well as staging
    626 /// CPU side buffers.
    627 pub struct UploadTexturePool {
    628    /// The textures in the pool associated with a last used frame index.
    629    ///
    630    /// The outer array corresponds to each of teh three supported texture formats.
    631    textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT],
    632    // Frame at which to deallocate some textures if there are too many in the pool,
    633    // for each format.
    634    delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT],
    635    current_frame: u64,
    636 
    637    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
    638    ///
    639    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
    640    /// To keep things simple we always allocate enough memory for formats with four bytes
    641    /// per pixel (more than we need for alpha-only textures but it works just as well).
    642    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
    643    min_temporary_buffers: usize,
    644    delay_buffer_deallocation: u64,
    645 }
    646 
    647 impl UploadTexturePool {
    648    pub fn new() -> Self {
    649        UploadTexturePool {
    650            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()],
    651            delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT],
    652            current_frame: 0,
    653            temporary_buffers: Vec::new(),
    654            min_temporary_buffers: 0,
    655            delay_buffer_deallocation: 0,
    656        }
    657    }
    658 
    659    fn format_index(&self, format: ImageFormat) -> usize {
    660        match format {
    661            ImageFormat::RGBA8 => 0,
    662            ImageFormat::BGRA8 => 1,
    663            ImageFormat::R8 => 2,
    664            ImageFormat::R16 => 3,
    665            _ => { panic!("unexpected format {:?}", format); }
    666        }
    667    }
    668 
    669    pub fn begin_frame(&mut self) {
    670        self.current_frame += 1;
    671        self.min_temporary_buffers = self.temporary_buffers.len();
    672    }
    673 
    674    /// Create or reuse a staging texture.
    675    ///
    676    /// See also return_texture.
    677    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
    678 
    679        // First try to reuse a texture from the pool.
    680        // "available" here means hasn't been used for 2 frames to avoid stalls.
    681        // No need to scan the vector. Newer textures are always pushed at the back
    682        // of the vector so we know the first element is the least recently used.
    683        let format_idx = self.format_index(format);
    684        let can_reuse = self.textures[format_idx].get(0)
    685            .map(|tex| self.current_frame - tex.1 > 2)
    686            .unwrap_or(false);
    687 
    688        if can_reuse {
    689            return self.textures[format_idx].pop_front().unwrap().0;
    690        }
    691 
    692        // If we couldn't find an available texture, create a new one.
    693 
    694        device.create_texture(
    695            ImageBufferKind::Texture2D,
    696            format,
    697            BATCH_UPLOAD_TEXTURE_SIZE.width,
    698            BATCH_UPLOAD_TEXTURE_SIZE.height,
    699            TextureFilter::Nearest,
    700            // Currently we need render target support as we always use glBlitFramebuffer
    701            // to copy the texture data. Instead, we should use glCopyImageSubData on some
    702            // platforms, and avoid creating the FBOs in that case.
    703            Some(RenderTargetInfo { has_depth: false }),
    704        )
    705    }
    706 
    707    /// Hand the staging texture back to the pool after being done with uploads.
    708    ///
    709    /// The texture must have been obtained from this pool via get_texture.
    710    pub fn return_texture(&mut self, texture: Texture) {
    711        let format_idx = self.format_index(texture.get_format());
    712        self.textures[format_idx].push_back((texture, self.current_frame));
    713    }
    714 
    715    /// Create or reuse a temporary CPU buffer.
    716    ///
    717    /// These buffers are used in the batched upload path when PBOs are not supported.
    718    /// Content is first written to the temporary buffer and uploaded via a single
    719    /// glTexSubImage2D call.
    720    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
    721        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
    722            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
    723        });
    724        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
    725        buffer
    726    }
    727 
    728    /// Return memory that was obtained from this pool via get_temporary_buffer.
    729    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
    730        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
    731        self.temporary_buffers.push(buffer);
    732    }
    733 
    734    /// Deallocate this pool's CPU and GPU memory.
    735    pub fn delete_textures(&mut self, device: &mut Device) {
    736        for format in &mut self.textures {
    737            while let Some(texture) = format.pop_back() {
    738                device.delete_texture(texture.0)
    739            }
    740        }
    741        self.temporary_buffers.clear();
    742    }
    743 
    744    /// Deallocate some textures if there are too many for a long time.
    745    pub fn end_frame(&mut self, device: &mut Device) {
    746        for format_idx in 0..self.textures.len() {
    747            // Count the number of reusable staging textures.
    748            // if it stays high for a large number of frames, truncate it back to 8-ish
    749            // over multiple frames.
    750 
    751            let mut num_reusable_textures = 0;
    752            for texture in &self.textures[format_idx] {
    753                if self.current_frame - texture.1 > 2 {
    754                    num_reusable_textures += 1;
    755                }
    756            }
    757 
    758            if num_reusable_textures < 8 {
    759                // Don't deallocate textures for another 120 frames.
    760                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
    761            }
    762 
    763            // Deallocate up to 4 staging textures every frame.
    764            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
    765                num_reusable_textures.min(4)
    766            } else {
    767                0
    768            };
    769 
    770            for _ in 0..to_remove {
    771                let texture = self.textures[format_idx].pop_front().unwrap().0;
    772                device.delete_texture(texture);
    773            }
    774        }
    775 
    776        // Similar logic for temporary CPU buffers. Our calls to get and return
    777        // temporary buffers should have been balanced for this frame, but the call
    778        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
    779        // carry these buffers from frame to frame, we keep track of the smallest
    780        // length of the temporary_buffers vec that we encountered this frame. Those
    781        // buffers were not touched and we deallocate some if there are a lot of them.
    782        let unused_buffers = self.min_temporary_buffers;
    783        if unused_buffers < 8 {
    784            self.delay_buffer_deallocation = self.current_frame + 120;
    785        }
    786        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
    787            unused_buffers.min(4)
    788        } else {
    789            0
    790        };
    791        for _ in 0..to_remove {
    792            // Unlike textures it doesn't matter whether we pop from the front or back
    793            // of the vector.
    794            self.temporary_buffers.pop();
    795        }
    796    }
    797 
    798    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
    799        for buf in &self.temporary_buffers {
    800            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
    801        }
    802 
    803        for format in &self.textures {
    804            for texture in format {
    805                report.upload_staging_textures += texture.0.size_in_bytes();
    806            }
    807        }
    808    }
    809 }
    810 
    811 struct UploadStats {
    812    num_draw_calls: u32,
    813    upload_time: u64,
    814    cpu_buffer_alloc_time: u64,
    815    texture_alloc_time: u64,
    816    cpu_copy_time: u64,
    817    gpu_copy_commands_time: u64,
    818    bytes_uploaded: usize,
    819    items_uploaded: usize,
    820 }
    821 
    822 #[derive(Debug)]
    823 enum StagingBufferKind<'a> {
    824    Pbo(UploadStagingBuffer<'a>),
    825    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
    826    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
    827 }
    828 #[derive(Debug)]
    829 struct BatchUploadBuffer<'a> {
    830    staging_buffer: StagingBufferKind<'a>,
    831    texture_index: usize,
    832    // A rectangle containing all items going into this staging texture, so
    833    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
    834    upload_rect: DeviceIntRect,
    835 }
    836 
    837 // On some devices performing many small texture uploads is slow, so instead we batch
    838 // updates in to a small number of uploads to temporary textures, then copy from those
    839 // textures to the correct place in the texture cache.
    840 // A list of temporary textures that batches of updates are uploaded to.
    841 #[derive(Debug)]
    842 struct BatchUploadCopy {
    843    // Index within batch_upload_textures
    844    src_texture_index: usize,
    845    src_offset: DeviceIntPoint,
    846    dest_texture_id: CacheTextureId,
    847    dest_offset: DeviceIntPoint,
    848    size: DeviceIntSize,
    849 }