gl.rs (174884B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 use super::super::shader_source::{OPTIMIZED_SHADERS, UNOPTIMIZED_SHADERS}; 6 use api::{ImageDescriptor, ImageFormat, Parameter, BoolParameter, IntParameter, ImageRendering}; 7 use api::{MixBlendMode, ImageBufferKind, VoidPtrToSizeFn}; 8 use api::{CrashAnnotator, CrashAnnotation, CrashAnnotatorGuard}; 9 use api::units::*; 10 use euclid::default::Transform3D; 11 use gleam::gl; 12 use crate::render_api::MemoryReport; 13 use crate::internal_types::{FastHashMap, RenderTargetInfo, Swizzle, SwizzleSettings}; 14 use crate::util::round_up_to_multiple; 15 use crate::profiler; 16 use log::Level; 17 use smallvec::SmallVec; 18 use std::{ 19 borrow::Cow, 20 cell::{Cell, RefCell}, 21 cmp, 22 collections::hash_map::Entry, 23 marker::PhantomData, 24 mem, 25 num::NonZeroUsize, 26 os::raw::c_void, 27 ops::Add, 28 path::PathBuf, 29 ptr, 30 rc::Rc, 31 slice, 32 sync::Arc, 33 thread, 34 time::Duration, 35 }; 36 use webrender_build::shader::{ 37 ProgramSourceDigest, ShaderKind, ShaderVersion, build_shader_main_string, 38 build_shader_prefix_string, do_build_shader_string, shader_source_from_file, 39 }; 40 use malloc_size_of::MallocSizeOfOps; 41 42 /// Sequence number for frames, as tracked by the device layer. 43 #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)] 44 #[cfg_attr(feature = "capture", derive(Serialize))] 45 #[cfg_attr(feature = "replay", derive(Deserialize))] 46 pub struct GpuFrameId(usize); 47 48 impl GpuFrameId { 49 pub fn new(value: usize) -> Self { 50 GpuFrameId(value) 51 } 52 } 53 54 impl Add<usize> for GpuFrameId { 55 type Output = GpuFrameId; 56 57 fn add(self, other: usize) -> GpuFrameId { 58 GpuFrameId(self.0 + other) 59 } 60 } 61 62 pub struct TextureSlot(pub usize); 63 64 // In some places we need to temporarily bind a texture to any slot. 65 const DEFAULT_TEXTURE: TextureSlot = TextureSlot(0); 66 67 #[repr(u32)] 68 pub enum DepthFunction { 69 Always = gl::ALWAYS, 70 Less = gl::LESS, 71 LessEqual = gl::LEQUAL, 72 } 73 74 #[repr(u32)] 75 #[derive(Copy, Clone, Debug, Eq, PartialEq)] 76 #[cfg_attr(feature = "capture", derive(Serialize))] 77 #[cfg_attr(feature = "replay", derive(Deserialize))] 78 pub enum TextureFilter { 79 Nearest, 80 Linear, 81 Trilinear, 82 } 83 84 /// A structure defining a particular workflow of texture transfers. 85 #[derive(Clone, Debug)] 86 #[cfg_attr(feature = "capture", derive(Serialize))] 87 #[cfg_attr(feature = "replay", derive(Deserialize))] 88 pub struct TextureFormatPair<T> { 89 /// Format the GPU natively stores texels in. 90 pub internal: T, 91 /// Format we expect the users to provide the texels in. 92 pub external: T, 93 } 94 95 impl<T: Copy> From<T> for TextureFormatPair<T> { 96 fn from(value: T) -> Self { 97 TextureFormatPair { 98 internal: value, 99 external: value, 100 } 101 } 102 } 103 104 #[derive(Debug)] 105 pub enum VertexAttributeKind { 106 F32, 107 U8Norm, 108 U16Norm, 109 I32, 110 U16, 111 } 112 113 #[derive(Debug)] 114 pub struct VertexAttribute { 115 pub name: &'static str, 116 pub count: u32, 117 pub kind: VertexAttributeKind, 118 } 119 120 impl VertexAttribute { 121 pub const fn quad_instance_vertex() -> Self { 122 VertexAttribute { 123 name: "aPosition", 124 count: 2, 125 kind: VertexAttributeKind::U8Norm, 126 } 127 } 128 129 pub const fn gpu_buffer_address(name: &'static str) -> Self { 130 VertexAttribute { 131 name, 132 count: 1, 133 kind: VertexAttributeKind::I32, 134 } 135 } 136 137 pub const fn f32x4(name: &'static str) -> Self { 138 VertexAttribute { 139 name, 140 count: 4, 141 kind: VertexAttributeKind::F32, 142 } 143 } 144 145 pub const fn f32x3(name: &'static str) -> Self { 146 VertexAttribute { 147 name, 148 count: 3, 149 kind: VertexAttributeKind::F32, 150 } 151 } 152 153 pub const fn f32x2(name: &'static str) -> Self { 154 VertexAttribute { 155 name, 156 count: 2, 157 kind: VertexAttributeKind::F32, 158 } 159 } 160 161 pub const fn f32(name: &'static str) -> Self { 162 VertexAttribute { 163 name, 164 count: 1, 165 kind: VertexAttributeKind::F32, 166 } 167 } 168 169 pub const fn i32x4(name: &'static str) -> Self { 170 VertexAttribute { 171 name, 172 count: 4, 173 kind: VertexAttributeKind::I32, 174 } 175 } 176 177 pub const fn i32x2(name: &'static str) -> Self { 178 VertexAttribute { 179 name, 180 count: 2, 181 kind: VertexAttributeKind::I32, 182 } 183 } 184 185 pub const fn i32(name: &'static str) -> Self { 186 VertexAttribute { 187 name, 188 count: 1, 189 kind: VertexAttributeKind::I32, 190 } 191 } 192 193 pub const fn u16(name: &'static str) -> Self { 194 VertexAttribute { 195 name, 196 count: 1, 197 kind: VertexAttributeKind::U16, 198 } 199 } 200 201 pub const fn u16x2(name: &'static str) -> Self { 202 VertexAttribute { 203 name, 204 count: 2, 205 kind: VertexAttributeKind::U16, 206 } 207 } 208 } 209 210 #[derive(Debug)] 211 pub struct VertexDescriptor { 212 pub vertex_attributes: &'static [VertexAttribute], 213 pub instance_attributes: &'static [VertexAttribute], 214 } 215 216 enum FBOTarget { 217 Read, 218 Draw, 219 } 220 221 /// Method of uploading texel data from CPU to GPU. 222 #[derive(Debug, Clone)] 223 pub enum UploadMethod { 224 /// Just call `glTexSubImage` directly with the CPU data pointer 225 Immediate, 226 /// Accumulate the changes in PBO first before transferring to a texture. 227 PixelBuffer(VertexUsageHint), 228 } 229 230 /// Plain old data that can be used to initialize a texture. 231 pub unsafe trait Texel: Copy + Default { 232 fn image_format() -> ImageFormat; 233 } 234 235 unsafe impl Texel for u8 { 236 fn image_format() -> ImageFormat { ImageFormat::R8 } 237 } 238 239 /// Returns the size in bytes of a depth target with the given dimensions. 240 fn depth_target_size_in_bytes(dimensions: &DeviceIntSize) -> usize { 241 // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte 242 // for stencil, so we measure them as 32 bits. 243 let pixels = dimensions.width * dimensions.height; 244 (pixels as usize) * 4 245 } 246 247 pub fn get_gl_target(target: ImageBufferKind) -> gl::GLuint { 248 match target { 249 ImageBufferKind::Texture2D => gl::TEXTURE_2D, 250 ImageBufferKind::TextureRect => gl::TEXTURE_RECTANGLE, 251 ImageBufferKind::TextureExternal => gl::TEXTURE_EXTERNAL_OES, 252 ImageBufferKind::TextureExternalBT709 => gl::TEXTURE_EXTERNAL_OES, 253 } 254 } 255 256 pub fn from_gl_target(target: gl::GLuint) -> ImageBufferKind { 257 match target { 258 gl::TEXTURE_2D => ImageBufferKind::Texture2D, 259 gl::TEXTURE_RECTANGLE => ImageBufferKind::TextureRect, 260 gl::TEXTURE_EXTERNAL_OES => ImageBufferKind::TextureExternal, 261 _ => panic!("Unexpected target {:?}", target), 262 } 263 } 264 265 fn supports_extension(extensions: &[String], extension: &str) -> bool { 266 extensions.iter().any(|s| s == extension) 267 } 268 269 fn get_shader_version(gl: &dyn gl::Gl) -> ShaderVersion { 270 match gl.get_type() { 271 gl::GlType::Gl => ShaderVersion::Gl, 272 gl::GlType::Gles => ShaderVersion::Gles, 273 } 274 } 275 276 // Get an unoptimized shader string by name, from the built in resources or 277 // an override path, if supplied. 278 pub fn get_unoptimized_shader_source(shader_name: &str, base_path: Option<&PathBuf>) -> Cow<'static, str> { 279 if let Some(ref base) = base_path { 280 let shader_path = base.join(&format!("{}.glsl", shader_name)); 281 Cow::Owned(shader_source_from_file(&shader_path)) 282 } else { 283 Cow::Borrowed( 284 UNOPTIMIZED_SHADERS 285 .get(shader_name) 286 .expect("Shader not found") 287 .source 288 ) 289 } 290 } 291 292 impl VertexAttributeKind { 293 fn size_in_bytes(&self) -> u32 { 294 match *self { 295 VertexAttributeKind::F32 => 4, 296 VertexAttributeKind::U8Norm => 1, 297 VertexAttributeKind::U16Norm => 2, 298 VertexAttributeKind::I32 => 4, 299 VertexAttributeKind::U16 => 2, 300 } 301 } 302 } 303 304 impl VertexAttribute { 305 fn size_in_bytes(&self) -> u32 { 306 self.count * self.kind.size_in_bytes() 307 } 308 309 fn bind_to_vao( 310 &self, 311 attr_index: gl::GLuint, 312 divisor: gl::GLuint, 313 stride: gl::GLint, 314 offset: gl::GLuint, 315 gl: &dyn gl::Gl, 316 ) { 317 gl.enable_vertex_attrib_array(attr_index); 318 gl.vertex_attrib_divisor(attr_index, divisor); 319 320 match self.kind { 321 VertexAttributeKind::F32 => { 322 gl.vertex_attrib_pointer( 323 attr_index, 324 self.count as gl::GLint, 325 gl::FLOAT, 326 false, 327 stride, 328 offset, 329 ); 330 } 331 VertexAttributeKind::U8Norm => { 332 gl.vertex_attrib_pointer( 333 attr_index, 334 self.count as gl::GLint, 335 gl::UNSIGNED_BYTE, 336 true, 337 stride, 338 offset, 339 ); 340 } 341 VertexAttributeKind::U16Norm => { 342 gl.vertex_attrib_pointer( 343 attr_index, 344 self.count as gl::GLint, 345 gl::UNSIGNED_SHORT, 346 true, 347 stride, 348 offset, 349 ); 350 } 351 VertexAttributeKind::I32 => { 352 gl.vertex_attrib_i_pointer( 353 attr_index, 354 self.count as gl::GLint, 355 gl::INT, 356 stride, 357 offset, 358 ); 359 } 360 VertexAttributeKind::U16 => { 361 gl.vertex_attrib_i_pointer( 362 attr_index, 363 self.count as gl::GLint, 364 gl::UNSIGNED_SHORT, 365 stride, 366 offset, 367 ); 368 } 369 } 370 } 371 } 372 373 impl VertexDescriptor { 374 fn instance_stride(&self) -> u32 { 375 self.instance_attributes 376 .iter() 377 .map(|attr| attr.size_in_bytes()) 378 .sum() 379 } 380 381 fn bind_attributes( 382 attributes: &[VertexAttribute], 383 start_index: usize, 384 divisor: u32, 385 gl: &dyn gl::Gl, 386 vbo: VBOId, 387 ) { 388 vbo.bind(gl); 389 390 let stride: u32 = attributes 391 .iter() 392 .map(|attr| attr.size_in_bytes()) 393 .sum(); 394 395 let mut offset = 0; 396 for (i, attr) in attributes.iter().enumerate() { 397 let attr_index = (start_index + i) as gl::GLuint; 398 attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl); 399 offset += attr.size_in_bytes(); 400 } 401 } 402 403 fn bind(&self, gl: &dyn gl::Gl, main: VBOId, instance: VBOId, instance_divisor: u32) { 404 Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main); 405 406 if !self.instance_attributes.is_empty() { 407 Self::bind_attributes( 408 self.instance_attributes, 409 self.vertex_attributes.len(), 410 instance_divisor, 411 gl, 412 instance, 413 ); 414 } 415 } 416 } 417 418 impl VBOId { 419 fn bind(&self, gl: &dyn gl::Gl) { 420 gl.bind_buffer(gl::ARRAY_BUFFER, self.0); 421 } 422 } 423 424 impl IBOId { 425 fn bind(&self, gl: &dyn gl::Gl) { 426 gl.bind_buffer(gl::ELEMENT_ARRAY_BUFFER, self.0); 427 } 428 } 429 430 impl FBOId { 431 fn bind(&self, gl: &dyn gl::Gl, target: FBOTarget) { 432 let target = match target { 433 FBOTarget::Read => gl::READ_FRAMEBUFFER, 434 FBOTarget::Draw => gl::DRAW_FRAMEBUFFER, 435 }; 436 gl.bind_framebuffer(target, self.0); 437 } 438 } 439 440 pub struct Stream<'a> { 441 attributes: &'a [VertexAttribute], 442 vbo: VBOId, 443 } 444 445 pub struct VBO<V> { 446 id: gl::GLuint, 447 target: gl::GLenum, 448 allocated_count: usize, 449 marker: PhantomData<V>, 450 } 451 452 impl<V> VBO<V> { 453 pub fn allocated_count(&self) -> usize { 454 self.allocated_count 455 } 456 457 pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> { 458 debug_assert_eq!( 459 mem::size_of::<V>(), 460 attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>() 461 ); 462 Stream { 463 attributes, 464 vbo: VBOId(self.id), 465 } 466 } 467 } 468 469 impl<T> Drop for VBO<T> { 470 fn drop(&mut self) { 471 debug_assert!(thread::panicking() || self.id == 0); 472 } 473 } 474 475 #[cfg_attr(feature = "replay", derive(Clone))] 476 #[derive(Debug)] 477 pub struct ExternalTexture { 478 id: gl::GLuint, 479 target: gl::GLuint, 480 uv_rect: TexelRect, 481 image_rendering: ImageRendering, 482 } 483 484 impl ExternalTexture { 485 pub fn new( 486 id: u32, 487 target: ImageBufferKind, 488 uv_rect: TexelRect, 489 image_rendering: ImageRendering, 490 ) -> Self { 491 ExternalTexture { 492 id, 493 target: get_gl_target(target), 494 uv_rect, 495 image_rendering, 496 } 497 } 498 499 #[cfg(feature = "replay")] 500 pub fn internal_id(&self) -> gl::GLuint { 501 self.id 502 } 503 504 pub fn get_uv_rect(&self) -> TexelRect { 505 self.uv_rect 506 } 507 } 508 509 bitflags! { 510 #[derive(Default, Debug, Copy, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)] 511 pub struct TextureFlags: u32 { 512 /// This texture corresponds to one of the shared texture caches. 513 const IS_SHARED_TEXTURE_CACHE = 1 << 0; 514 } 515 } 516 517 /// WebRender interface to an OpenGL texture. 518 /// 519 /// Because freeing a texture requires various device handles that are not 520 /// reachable from this struct, manual destruction via `Device` is required. 521 /// Our `Drop` implementation asserts that this has happened. 522 #[derive(Debug)] 523 pub struct Texture { 524 id: gl::GLuint, 525 target: gl::GLuint, 526 format: ImageFormat, 527 size: DeviceIntSize, 528 filter: TextureFilter, 529 flags: TextureFlags, 530 /// An internally mutable swizzling state that may change between batches. 531 active_swizzle: Cell<Swizzle>, 532 /// Framebuffer Object allowing this texture to be rendered to. 533 /// 534 /// Empty if this texture is not used as a render target or if a depth buffer is needed. 535 fbo: Option<FBOId>, 536 /// Same as the above, but with a depth buffer attached. 537 /// 538 /// FBOs are cheap to create but expensive to reconfigure (since doing so 539 /// invalidates framebuffer completeness caching). Moreover, rendering with 540 /// a depth buffer attached but the depth write+test disabled relies on the 541 /// driver to optimize it out of the rendering pass, which most drivers 542 /// probably do but, according to jgilbert, is best not to rely on. 543 /// 544 /// So we lazily generate a second list of FBOs with depth. This list is 545 /// empty if this texture is not used as a render target _or_ if it is, but 546 /// the depth buffer has never been requested. 547 /// 548 /// Note that we always fill fbo, and then lazily create fbo_with_depth 549 /// when needed. We could make both lazy (i.e. render targets would have one 550 /// or the other, but not both, unless they were actually used in both 551 /// configurations). But that would complicate a lot of logic in this module, 552 /// and FBOs are cheap enough to create. 553 fbo_with_depth: Option<FBOId>, 554 last_frame_used: GpuFrameId, 555 } 556 557 impl Texture { 558 pub fn get_dimensions(&self) -> DeviceIntSize { 559 self.size 560 } 561 562 pub fn get_format(&self) -> ImageFormat { 563 self.format 564 } 565 566 pub fn get_filter(&self) -> TextureFilter { 567 self.filter 568 } 569 570 pub fn get_target(&self) -> ImageBufferKind { 571 from_gl_target(self.target) 572 } 573 574 pub fn supports_depth(&self) -> bool { 575 self.fbo_with_depth.is_some() 576 } 577 578 pub fn last_frame_used(&self) -> GpuFrameId { 579 self.last_frame_used 580 } 581 582 pub fn used_in_frame(&self, frame_id: GpuFrameId) -> bool { 583 self.last_frame_used == frame_id 584 } 585 586 pub fn is_render_target(&self) -> bool { 587 self.fbo.is_some() 588 } 589 590 /// Returns true if this texture was used within `threshold` frames of 591 /// the current frame. 592 pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool { 593 self.last_frame_used + threshold >= current_frame_id 594 } 595 596 /// Returns the flags for this texture. 597 pub fn flags(&self) -> &TextureFlags { 598 &self.flags 599 } 600 601 /// Returns a mutable borrow of the flags for this texture. 602 pub fn flags_mut(&mut self) -> &mut TextureFlags { 603 &mut self.flags 604 } 605 606 /// Returns the number of bytes (generally in GPU memory) that this texture 607 /// consumes. 608 pub fn size_in_bytes(&self) -> usize { 609 let bpp = self.format.bytes_per_pixel() as usize; 610 let w = self.size.width as usize; 611 let h = self.size.height as usize; 612 bpp * w * h 613 } 614 615 #[cfg(feature = "replay")] 616 pub fn into_external(mut self) -> ExternalTexture { 617 let ext = ExternalTexture { 618 id: self.id, 619 target: self.target, 620 // TODO(gw): Support custom UV rect for external textures during captures 621 uv_rect: TexelRect::new( 622 0.0, 623 0.0, 624 self.size.width as f32, 625 self.size.height as f32, 626 ), 627 image_rendering: ImageRendering::Auto, 628 }; 629 self.id = 0; // don't complain, moved out 630 ext 631 } 632 } 633 634 impl Drop for Texture { 635 fn drop(&mut self) { 636 debug_assert!(thread::panicking() || self.id == 0); 637 } 638 } 639 640 pub struct Program { 641 id: gl::GLuint, 642 u_transform: gl::GLint, 643 u_texture_size: gl::GLint, 644 source_info: ProgramSourceInfo, 645 is_initialized: bool, 646 } 647 648 impl Program { 649 pub fn is_initialized(&self) -> bool { 650 self.is_initialized 651 } 652 } 653 654 impl Drop for Program { 655 fn drop(&mut self) { 656 debug_assert!( 657 thread::panicking() || self.id == 0, 658 "renderer::deinit not called" 659 ); 660 } 661 } 662 663 pub struct CustomVAO { 664 id: gl::GLuint, 665 } 666 667 impl Drop for CustomVAO { 668 fn drop(&mut self) { 669 debug_assert!( 670 thread::panicking() || self.id == 0, 671 "renderer::deinit not called" 672 ); 673 } 674 } 675 676 pub struct VAO { 677 id: gl::GLuint, 678 ibo_id: IBOId, 679 main_vbo_id: VBOId, 680 instance_vbo_id: VBOId, 681 instance_stride: usize, 682 instance_divisor: u32, 683 owns_vertices_and_indices: bool, 684 } 685 686 impl Drop for VAO { 687 fn drop(&mut self) { 688 debug_assert!( 689 thread::panicking() || self.id == 0, 690 "renderer::deinit not called" 691 ); 692 } 693 } 694 695 #[derive(Debug)] 696 pub struct PBO { 697 id: gl::GLuint, 698 reserved_size: usize, 699 } 700 701 impl PBO { 702 pub fn get_reserved_size(&self) -> usize { 703 self.reserved_size 704 } 705 } 706 707 impl Drop for PBO { 708 fn drop(&mut self) { 709 debug_assert!( 710 thread::panicking() || self.id == 0, 711 "renderer::deinit not called or PBO not returned to pool" 712 ); 713 } 714 } 715 716 pub struct BoundPBO<'a> { 717 device: &'a mut Device, 718 pub data: &'a [u8] 719 } 720 721 impl<'a> Drop for BoundPBO<'a> { 722 fn drop(&mut self) { 723 self.device.gl.unmap_buffer(gl::PIXEL_PACK_BUFFER); 724 self.device.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, 0); 725 } 726 } 727 728 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)] 729 pub struct FBOId(gl::GLuint); 730 731 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)] 732 pub struct RBOId(gl::GLuint); 733 734 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)] 735 pub struct VBOId(gl::GLuint); 736 737 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)] 738 struct IBOId(gl::GLuint); 739 740 #[derive(Clone, Debug)] 741 enum ProgramSourceType { 742 Unoptimized, 743 Optimized(ShaderVersion), 744 } 745 746 #[derive(Clone, Debug)] 747 pub struct ProgramSourceInfo { 748 base_filename: &'static str, 749 features: Vec<&'static str>, 750 full_name_cstr: Rc<std::ffi::CString>, 751 source_type: ProgramSourceType, 752 digest: ProgramSourceDigest, 753 } 754 755 impl ProgramSourceInfo { 756 fn new( 757 device: &Device, 758 name: &'static str, 759 features: &[&'static str], 760 ) -> Self { 761 762 // Compute the digest. Assuming the device has a `ProgramCache`, this 763 // will always be needed, whereas the source is rarely needed. 764 765 use std::collections::hash_map::DefaultHasher; 766 use std::hash::Hasher; 767 768 // Setup. 769 let mut hasher = DefaultHasher::new(); 770 let gl_version = get_shader_version(&*device.gl()); 771 772 // Hash the renderer name. 773 hasher.write(device.capabilities.renderer_name.as_bytes()); 774 775 let full_name = Self::make_full_name(name, features); 776 777 let optimized_source = if device.use_optimized_shaders { 778 OPTIMIZED_SHADERS.get(&(gl_version, &full_name)).or_else(|| { 779 warn!("Missing optimized shader source for {}", &full_name); 780 None 781 }) 782 } else { 783 None 784 }; 785 786 let source_type = match optimized_source { 787 Some(source_and_digest) => { 788 // Optimized shader sources are used as-is, without any run-time processing. 789 // The vertex and fragment shaders are different, so must both be hashed. 790 // We use the hashes that were computed at build time, and verify it in debug builds. 791 if cfg!(debug_assertions) { 792 let mut h = DefaultHasher::new(); 793 h.write(source_and_digest.vert_source.as_bytes()); 794 h.write(source_and_digest.frag_source.as_bytes()); 795 let d: ProgramSourceDigest = h.into(); 796 let digest = d.to_string(); 797 debug_assert_eq!(digest, source_and_digest.digest); 798 hasher.write(digest.as_bytes()); 799 } else { 800 hasher.write(source_and_digest.digest.as_bytes()); 801 } 802 803 ProgramSourceType::Optimized(gl_version) 804 } 805 None => { 806 // For non-optimized sources we compute the hash by walking the static strings 807 // in the same order as we would when concatenating the source, to avoid 808 // heap-allocating in the common case. 809 // 810 // Note that we cheat a bit to make the hashing more efficient. First, the only 811 // difference between the vertex and fragment shader is a single deterministic 812 // define, so we don't need to hash both. Second, we precompute the digest of the 813 // expanded source file at build time, and then just hash that digest here. 814 let override_path = device.resource_override_path.as_ref(); 815 let source_and_digest = UNOPTIMIZED_SHADERS.get(&name).expect("Shader not found"); 816 817 // Hash the prefix string. 818 build_shader_prefix_string( 819 gl_version, 820 &features, 821 ShaderKind::Vertex, 822 &name, 823 &mut |s| hasher.write(s.as_bytes()), 824 ); 825 826 // Hash the shader file contents. We use a precomputed digest, and 827 // verify it in debug builds. 828 if override_path.is_some() || cfg!(debug_assertions) { 829 let mut h = DefaultHasher::new(); 830 build_shader_main_string( 831 &name, 832 &|f| get_unoptimized_shader_source(f, override_path), 833 &mut |s| h.write(s.as_bytes()) 834 ); 835 let d: ProgramSourceDigest = h.into(); 836 let digest = format!("{}", d); 837 debug_assert!(override_path.is_some() || digest == source_and_digest.digest); 838 hasher.write(digest.as_bytes()); 839 } else { 840 hasher.write(source_and_digest.digest.as_bytes()); 841 } 842 843 ProgramSourceType::Unoptimized 844 } 845 }; 846 847 // Finish. 848 ProgramSourceInfo { 849 base_filename: name, 850 features: features.to_vec(), 851 full_name_cstr: Rc::new(std::ffi::CString::new(full_name).unwrap()), 852 source_type, 853 digest: hasher.into(), 854 } 855 } 856 857 fn compute_source(&self, device: &Device, kind: ShaderKind) -> String { 858 let full_name = self.full_name(); 859 match self.source_type { 860 ProgramSourceType::Optimized(gl_version) => { 861 let shader = OPTIMIZED_SHADERS 862 .get(&(gl_version, &full_name)) 863 .unwrap_or_else(|| panic!("Missing optimized shader source for {}", full_name)); 864 865 match kind { 866 ShaderKind::Vertex => shader.vert_source.to_string(), 867 ShaderKind::Fragment => shader.frag_source.to_string(), 868 } 869 }, 870 ProgramSourceType::Unoptimized => { 871 let mut src = String::new(); 872 device.build_shader_string( 873 &self.features, 874 kind, 875 self.base_filename, 876 |s| src.push_str(s), 877 ); 878 src 879 } 880 } 881 } 882 883 fn make_full_name(base_filename: &'static str, features: &[&'static str]) -> String { 884 if features.is_empty() { 885 base_filename.to_string() 886 } else { 887 format!("{}_{}", base_filename, features.join("_")) 888 } 889 } 890 891 fn full_name(&self) -> String { 892 Self::make_full_name(self.base_filename, &self.features) 893 } 894 } 895 896 #[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))] 897 pub struct ProgramBinary { 898 bytes: Vec<u8>, 899 format: gl::GLenum, 900 source_digest: ProgramSourceDigest, 901 } 902 903 impl ProgramBinary { 904 fn new(bytes: Vec<u8>, 905 format: gl::GLenum, 906 source_digest: ProgramSourceDigest) -> Self { 907 ProgramBinary { 908 bytes, 909 format, 910 source_digest, 911 } 912 } 913 914 /// Returns a reference to the source digest hash. 915 pub fn source_digest(&self) -> &ProgramSourceDigest { 916 &self.source_digest 917 } 918 } 919 920 /// The interfaces that an application can implement to handle ProgramCache update 921 pub trait ProgramCacheObserver { 922 fn save_shaders_to_disk(&self, entries: Vec<Arc<ProgramBinary>>); 923 fn set_startup_shaders(&self, entries: Vec<Arc<ProgramBinary>>); 924 fn try_load_shader_from_disk(&self, digest: &ProgramSourceDigest, program_cache: &Rc<ProgramCache>); 925 fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>); 926 } 927 928 struct ProgramCacheEntry { 929 /// The binary. 930 binary: Arc<ProgramBinary>, 931 /// True if the binary has been linked, i.e. used for rendering. 932 linked: bool, 933 } 934 935 pub struct ProgramCache { 936 entries: RefCell<FastHashMap<ProgramSourceDigest, ProgramCacheEntry>>, 937 938 /// Optional trait object that allows the client 939 /// application to handle ProgramCache updating 940 program_cache_handler: Option<Box<dyn ProgramCacheObserver>>, 941 942 /// Programs that have not yet been cached to disk (by program_cache_handler) 943 pending_entries: RefCell<Vec<Arc<ProgramBinary>>>, 944 } 945 946 impl ProgramCache { 947 pub fn new(program_cache_observer: Option<Box<dyn ProgramCacheObserver>>) -> Rc<Self> { 948 Rc::new( 949 ProgramCache { 950 entries: RefCell::new(FastHashMap::default()), 951 program_cache_handler: program_cache_observer, 952 pending_entries: RefCell::new(Vec::default()), 953 } 954 ) 955 } 956 957 /// Save any new program binaries to the disk cache, and if startup has 958 /// just completed then write the list of shaders to load on next startup. 959 fn update_disk_cache(&self, startup_complete: bool) { 960 if let Some(ref handler) = self.program_cache_handler { 961 if !self.pending_entries.borrow().is_empty() { 962 let pending_entries = self.pending_entries.replace(Vec::default()); 963 handler.save_shaders_to_disk(pending_entries); 964 } 965 966 if startup_complete { 967 let startup_shaders = self.entries.borrow().values() 968 .filter(|e| e.linked).map(|e| e.binary.clone()) 969 .collect::<Vec<_>>(); 970 handler.set_startup_shaders(startup_shaders); 971 } 972 } 973 } 974 975 /// Add a new ProgramBinary to the cache. 976 /// This function is typically used after compiling and linking a new program. 977 /// The binary will be saved to disk the next time update_disk_cache() is called. 978 fn add_new_program_binary(&self, program_binary: Arc<ProgramBinary>) { 979 self.pending_entries.borrow_mut().push(program_binary.clone()); 980 981 let digest = program_binary.source_digest.clone(); 982 let entry = ProgramCacheEntry { 983 binary: program_binary, 984 linked: true, 985 }; 986 self.entries.borrow_mut().insert(digest, entry); 987 } 988 989 /// Load ProgramBinary to ProgramCache. 990 /// The function is typically used to load ProgramBinary from disk. 991 #[cfg(feature = "serialize_program")] 992 pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) { 993 let digest = program_binary.source_digest.clone(); 994 let entry = ProgramCacheEntry { 995 binary: program_binary, 996 linked: false, 997 }; 998 self.entries.borrow_mut().insert(digest, entry); 999 } 1000 1001 /// Returns the number of bytes allocated for shaders in the cache. 1002 pub fn report_memory(&self, op: VoidPtrToSizeFn) -> usize { 1003 self.entries.borrow().values() 1004 .map(|e| unsafe { op(e.binary.bytes.as_ptr() as *const c_void ) }) 1005 .sum() 1006 } 1007 } 1008 1009 #[derive(Debug, Copy, Clone)] 1010 pub enum VertexUsageHint { 1011 Static, 1012 Dynamic, 1013 Stream, 1014 } 1015 1016 impl VertexUsageHint { 1017 fn to_gl(&self) -> gl::GLuint { 1018 match *self { 1019 VertexUsageHint::Static => gl::STATIC_DRAW, 1020 VertexUsageHint::Dynamic => gl::DYNAMIC_DRAW, 1021 VertexUsageHint::Stream => gl::STREAM_DRAW, 1022 } 1023 } 1024 } 1025 1026 #[derive(Copy, Clone, Debug)] 1027 pub struct UniformLocation(#[allow(dead_code)] gl::GLint); 1028 1029 impl UniformLocation { 1030 pub const INVALID: Self = UniformLocation(-1); 1031 } 1032 1033 #[derive(Debug)] 1034 pub struct Capabilities { 1035 /// Whether multisampled render targets are supported. 1036 pub supports_multisampling: bool, 1037 /// Whether the function `glCopyImageSubData` is available. 1038 pub supports_copy_image_sub_data: bool, 1039 /// Whether the RGBAF32 textures can be bound to framebuffers. 1040 pub supports_color_buffer_float: bool, 1041 /// Whether the device supports persistently mapped buffers, via glBufferStorage. 1042 pub supports_buffer_storage: bool, 1043 /// Whether advanced blend equations are supported. 1044 pub supports_advanced_blend_equation: bool, 1045 /// Whether dual-source blending is supported. 1046 pub supports_dual_source_blending: bool, 1047 /// Whether KHR_debug is supported for getting debug messages from 1048 /// the driver. 1049 pub supports_khr_debug: bool, 1050 /// Whether we can configure texture units to do swizzling on sampling. 1051 pub supports_texture_swizzle: bool, 1052 /// Whether the driver supports uploading to textures from a non-zero 1053 /// offset within a PBO. 1054 pub supports_nonzero_pbo_offsets: bool, 1055 /// Whether the driver supports specifying the texture usage up front. 1056 pub supports_texture_usage: bool, 1057 /// Whether offscreen render targets can be partially updated. 1058 pub supports_render_target_partial_update: bool, 1059 /// Whether we can use SSBOs. 1060 pub supports_shader_storage_object: bool, 1061 /// Whether to enforce that texture uploads be batched regardless of what 1062 /// the pref says. 1063 pub requires_batched_texture_uploads: Option<bool>, 1064 /// Whether we are able to ue glClear to clear regions of an alpha render target. 1065 /// If false, we must use a shader to clear instead. 1066 pub supports_alpha_target_clears: bool, 1067 /// Whether we must perform a full unscissored glClear on alpha targets 1068 /// prior to rendering. 1069 pub requires_alpha_target_full_clear: bool, 1070 /// Whether clearing a render target (immediately after binding it) is faster using a scissor 1071 /// rect to clear just the required area, or clearing the entire target without a scissor rect. 1072 pub prefers_clear_scissor: bool, 1073 /// Whether the driver can correctly invalidate render targets. This can be 1074 /// a worthwhile optimization, but is buggy on some devices. 1075 pub supports_render_target_invalidate: bool, 1076 /// Whether the driver can reliably upload data to R8 format textures. 1077 pub supports_r8_texture_upload: bool, 1078 /// Whether the extension QCOM_tiled_rendering is supported. 1079 pub supports_qcom_tiled_rendering: bool, 1080 /// Whether clip-masking is supported natively by the GL implementation 1081 /// rather than emulated in shaders. 1082 pub uses_native_clip_mask: bool, 1083 /// Whether anti-aliasing is supported natively by the GL implementation 1084 /// rather than emulated in shaders. 1085 pub uses_native_antialiasing: bool, 1086 /// Whether the extension GL_OES_EGL_image_external_essl3 is supported. If true, external 1087 /// textures can be used as normal. If false, external textures can only be rendered with 1088 /// certain shaders, and must first be copied in to regular textures for others. 1089 pub supports_image_external_essl3: bool, 1090 /// Whether the VAO must be rebound after an attached VBO has been orphaned. 1091 pub requires_vao_rebind_after_orphaning: bool, 1092 /// The name of the renderer, as reported by GL 1093 pub renderer_name: String, 1094 } 1095 1096 #[derive(Clone, Debug)] 1097 pub enum ShaderError { 1098 Compilation(String, String), // name, error message 1099 Link(String, String), // name, error message 1100 } 1101 1102 /// A refcounted depth target, which may be shared by multiple textures across 1103 /// the device. 1104 struct SharedDepthTarget { 1105 /// The Render Buffer Object representing the depth target. 1106 rbo_id: RBOId, 1107 /// Reference count. When this drops to zero, the RBO is deleted. 1108 refcount: usize, 1109 } 1110 1111 #[cfg(debug_assertions)] 1112 impl Drop for SharedDepthTarget { 1113 fn drop(&mut self) { 1114 debug_assert!(thread::panicking() || self.refcount == 0); 1115 } 1116 } 1117 1118 /// Describes for which texture formats to use the glTexStorage* 1119 /// family of functions. 1120 #[derive(PartialEq, Debug)] 1121 enum TexStorageUsage { 1122 Never, 1123 NonBGRA8, 1124 Always, 1125 } 1126 1127 /// Describes a required alignment for a stride, 1128 /// which can either be represented in bytes or pixels. 1129 #[derive(Copy, Clone, Debug)] 1130 pub enum StrideAlignment { 1131 Bytes(NonZeroUsize), 1132 Pixels(NonZeroUsize), 1133 } 1134 1135 impl StrideAlignment { 1136 pub fn num_bytes(&self, format: ImageFormat) -> NonZeroUsize { 1137 match *self { 1138 Self::Bytes(bytes) => bytes, 1139 Self::Pixels(pixels) => { 1140 assert!(format.bytes_per_pixel() > 0); 1141 NonZeroUsize::new(pixels.get() * format.bytes_per_pixel() as usize).unwrap() 1142 } 1143 } 1144 } 1145 } 1146 1147 // We get 24 bits of Z value - use up 22 bits of it to give us 1148 // 4 bits to account for GPU issues. This seems to manifest on 1149 // some GPUs under certain perspectives due to z interpolation 1150 // precision problems. 1151 const RESERVE_DEPTH_BITS: i32 = 2; 1152 1153 pub struct Device { 1154 gl: Rc<dyn gl::Gl>, 1155 1156 /// If non-None, |gl| points to a profiling wrapper, and this points to the 1157 /// underling Gl instance. 1158 base_gl: Option<Rc<dyn gl::Gl>>, 1159 1160 // device state 1161 bound_textures: [gl::GLuint; 16], 1162 bound_program: gl::GLuint, 1163 bound_program_name: Rc<std::ffi::CString>, 1164 bound_vao: gl::GLuint, 1165 bound_read_fbo: (FBOId, DeviceIntPoint), 1166 bound_draw_fbo: FBOId, 1167 default_read_fbo: FBOId, 1168 default_draw_fbo: FBOId, 1169 1170 /// Track depth state for assertions. Note that the default FBO has depth, 1171 /// so this defaults to true. 1172 depth_available: bool, 1173 1174 upload_method: UploadMethod, 1175 use_batched_texture_uploads: bool, 1176 /// Whether to use draw calls instead of regular blitting commands. 1177 /// 1178 /// Note: this currently only applies to the batched texture uploads 1179 /// path. 1180 use_draw_calls_for_texture_copy: bool, 1181 /// Number of pixels below which we prefer batched uploads. 1182 batched_upload_threshold: i32, 1183 1184 // HW or API capabilities 1185 capabilities: Capabilities, 1186 1187 color_formats: TextureFormatPair<ImageFormat>, 1188 bgra_formats: TextureFormatPair<gl::GLuint>, 1189 bgra_pixel_type: gl::GLuint, 1190 swizzle_settings: SwizzleSettings, 1191 depth_format: gl::GLuint, 1192 1193 /// Map from texture dimensions to shared depth buffers for render targets. 1194 /// 1195 /// Render targets often have the same width/height, so we can save memory 1196 /// by sharing these across targets. 1197 depth_targets: FastHashMap<DeviceIntSize, SharedDepthTarget>, 1198 1199 // debug 1200 inside_frame: bool, 1201 crash_annotator: Option<Box<dyn CrashAnnotator>>, 1202 annotate_draw_call_crashes: bool, 1203 1204 // resources 1205 resource_override_path: Option<PathBuf>, 1206 1207 /// Whether to use shaders that have been optimized at build time. 1208 use_optimized_shaders: bool, 1209 1210 max_texture_size: i32, 1211 cached_programs: Option<Rc<ProgramCache>>, 1212 1213 // Frame counter. This is used to map between CPU 1214 // frames and GPU frames. 1215 frame_id: GpuFrameId, 1216 1217 /// When to use glTexStorage*. We prefer this over glTexImage* because it 1218 /// guarantees that mipmaps won't be generated (which they otherwise are on 1219 /// some drivers, particularly ANGLE). However, it is not always supported 1220 /// at all, or for BGRA8 format. If it's not supported for the required 1221 /// format, we fall back to glTexImage*. 1222 texture_storage_usage: TexStorageUsage, 1223 1224 /// Required stride alignment for pixel transfers. This may be required for 1225 /// correctness reasons due to driver bugs, or for performance reasons to 1226 /// ensure we remain on the fast-path for transfers. 1227 required_pbo_stride: StrideAlignment, 1228 1229 /// Whether we must ensure the source strings passed to glShaderSource() 1230 /// are null-terminated, to work around driver bugs. 1231 requires_null_terminated_shader_source: bool, 1232 1233 /// Whether we must unbind any texture from GL_TEXTURE_EXTERNAL_OES before 1234 /// binding to GL_TEXTURE_2D, to work around an android emulator bug. 1235 requires_texture_external_unbind: bool, 1236 1237 /// 1238 is_software_webrender: bool, 1239 1240 // GL extensions 1241 extensions: Vec<String>, 1242 1243 /// Dumps the source of the shader with the given name 1244 dump_shader_source: Option<String>, 1245 1246 surface_origin_is_top_left: bool, 1247 1248 /// A debug boolean for tracking if the shader program has been set after 1249 /// a blend mode change. 1250 /// 1251 /// This is needed for compatibility with next-gen 1252 /// GPU APIs that switch states using "pipeline object" that bundles 1253 /// together the blending state with the shader. 1254 /// 1255 /// Having the constraint of always binding the shader last would allow 1256 /// us to have the "pipeline object" bound at that time. Without this 1257 /// constraint, we'd either have to eagerly bind the "pipeline object" 1258 /// on changing either the shader or the blend more, or lazily bind it 1259 /// at draw call time, neither of which is desirable. 1260 #[cfg(debug_assertions)] 1261 shader_is_ready: bool, 1262 1263 // count created/deleted textures to report in the profiler. 1264 pub textures_created: u32, 1265 pub textures_deleted: u32, 1266 } 1267 1268 /// Contains the parameters necessary to bind a draw target. 1269 #[derive(Clone, Copy, Debug)] 1270 pub enum DrawTarget { 1271 /// Use the device's default draw target, with the provided dimensions, 1272 /// which are used to set the viewport. 1273 Default { 1274 /// Target rectangle to draw. 1275 rect: FramebufferIntRect, 1276 /// Total size of the target. 1277 total_size: FramebufferIntSize, 1278 surface_origin_is_top_left: bool, 1279 }, 1280 /// Use the provided texture. 1281 Texture { 1282 /// Size of the texture in pixels 1283 dimensions: DeviceIntSize, 1284 /// Whether to draw with the texture's associated depth target 1285 with_depth: bool, 1286 /// FBO that corresponds to the selected layer / depth mode 1287 fbo_id: FBOId, 1288 /// Native GL texture ID 1289 id: gl::GLuint, 1290 /// Native GL texture target 1291 target: gl::GLuint, 1292 }, 1293 /// Use an FBO attached to an external texture. 1294 External { 1295 fbo: FBOId, 1296 size: FramebufferIntSize, 1297 }, 1298 /// An OS compositor surface 1299 NativeSurface { 1300 offset: DeviceIntPoint, 1301 external_fbo_id: u32, 1302 dimensions: DeviceIntSize, 1303 }, 1304 } 1305 1306 impl DrawTarget { 1307 pub fn new_default(size: DeviceIntSize, surface_origin_is_top_left: bool) -> Self { 1308 let total_size = device_size_as_framebuffer_size(size); 1309 DrawTarget::Default { 1310 rect: total_size.into(), 1311 total_size, 1312 surface_origin_is_top_left, 1313 } 1314 } 1315 1316 /// Returns true if this draw target corresponds to the default framebuffer. 1317 pub fn is_default(&self) -> bool { 1318 match *self { 1319 DrawTarget::Default {..} => true, 1320 _ => false, 1321 } 1322 } 1323 1324 pub fn from_texture( 1325 texture: &Texture, 1326 with_depth: bool, 1327 ) -> Self { 1328 let fbo_id = if with_depth { 1329 texture.fbo_with_depth.unwrap() 1330 } else { 1331 texture.fbo.unwrap() 1332 }; 1333 1334 DrawTarget::Texture { 1335 dimensions: texture.get_dimensions(), 1336 fbo_id, 1337 with_depth, 1338 id: texture.id, 1339 target: texture.target, 1340 } 1341 } 1342 1343 /// Returns the dimensions of this draw-target. 1344 pub fn dimensions(&self) -> DeviceIntSize { 1345 match *self { 1346 DrawTarget::Default { total_size, .. } => total_size.cast_unit(), 1347 DrawTarget::Texture { dimensions, .. } => dimensions, 1348 DrawTarget::External { size, .. } => size.cast_unit(), 1349 DrawTarget::NativeSurface { dimensions, .. } => dimensions, 1350 } 1351 } 1352 1353 pub fn offset(&self) -> DeviceIntPoint { 1354 match *self { 1355 DrawTarget::Default { .. } | 1356 DrawTarget::Texture { .. } | 1357 DrawTarget::External { .. } => { 1358 DeviceIntPoint::zero() 1359 } 1360 DrawTarget::NativeSurface { offset, .. } => offset, 1361 } 1362 } 1363 1364 pub fn to_framebuffer_rect(&self, device_rect: DeviceIntRect) -> FramebufferIntRect { 1365 let mut fb_rect = device_rect_as_framebuffer_rect(&device_rect); 1366 match *self { 1367 DrawTarget::Default { ref rect, surface_origin_is_top_left, .. } => { 1368 // perform a Y-flip here 1369 if !surface_origin_is_top_left { 1370 let w = fb_rect.width(); 1371 let h = fb_rect.height(); 1372 fb_rect.min.x = fb_rect.min.x + rect.min.x; 1373 fb_rect.min.y = rect.max.y - fb_rect.max.y; 1374 fb_rect.max.x = fb_rect.min.x + w; 1375 fb_rect.max.y = fb_rect.min.y + h; 1376 } 1377 } 1378 DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => (), 1379 } 1380 fb_rect 1381 } 1382 1383 pub fn surface_origin_is_top_left(&self) -> bool { 1384 match *self { 1385 DrawTarget::Default { surface_origin_is_top_left, .. } => surface_origin_is_top_left, 1386 DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => true, 1387 } 1388 } 1389 1390 /// Given a scissor rect, convert it to the right coordinate space 1391 /// depending on the draw target kind. If no scissor rect was supplied, 1392 /// returns a scissor rect that encloses the entire render target. 1393 pub fn build_scissor_rect( 1394 &self, 1395 scissor_rect: Option<DeviceIntRect>, 1396 ) -> FramebufferIntRect { 1397 let dimensions = self.dimensions(); 1398 1399 match scissor_rect { 1400 Some(scissor_rect) => match *self { 1401 DrawTarget::Default { ref rect, .. } => { 1402 self.to_framebuffer_rect(scissor_rect) 1403 .intersection(rect) 1404 .unwrap_or_else(FramebufferIntRect::zero) 1405 } 1406 DrawTarget::NativeSurface { offset, .. } => { 1407 device_rect_as_framebuffer_rect(&scissor_rect.translate(offset.to_vector())) 1408 } 1409 DrawTarget::Texture { .. } | DrawTarget::External { .. } => { 1410 device_rect_as_framebuffer_rect(&scissor_rect) 1411 } 1412 } 1413 None => { 1414 FramebufferIntRect::from_size( 1415 device_size_as_framebuffer_size(dimensions), 1416 ) 1417 } 1418 } 1419 } 1420 } 1421 1422 /// Contains the parameters necessary to bind a texture-backed read target. 1423 #[derive(Clone, Copy, Debug)] 1424 pub enum ReadTarget { 1425 /// Use the device's default draw target. 1426 Default, 1427 /// Use the provided texture, 1428 Texture { 1429 /// ID of the FBO to read from. 1430 fbo_id: FBOId, 1431 }, 1432 /// Use an FBO attached to an external texture. 1433 External { 1434 fbo: FBOId, 1435 }, 1436 /// An FBO bound to a native (OS compositor) surface 1437 NativeSurface { 1438 fbo_id: FBOId, 1439 offset: DeviceIntPoint, 1440 }, 1441 } 1442 1443 impl ReadTarget { 1444 pub fn from_texture( 1445 texture: &Texture, 1446 ) -> Self { 1447 ReadTarget::Texture { 1448 fbo_id: texture.fbo.unwrap(), 1449 } 1450 } 1451 1452 fn offset(&self) -> DeviceIntPoint { 1453 match *self { 1454 ReadTarget::Default | 1455 ReadTarget::Texture { .. } | 1456 ReadTarget::External { .. } => { 1457 DeviceIntPoint::zero() 1458 } 1459 1460 ReadTarget::NativeSurface { offset, .. } => { 1461 offset 1462 } 1463 } 1464 } 1465 } 1466 1467 impl From<DrawTarget> for ReadTarget { 1468 fn from(t: DrawTarget) -> Self { 1469 match t { 1470 DrawTarget::Default { .. } => { 1471 ReadTarget::Default 1472 } 1473 DrawTarget::NativeSurface { external_fbo_id, offset, .. } => { 1474 ReadTarget::NativeSurface { 1475 fbo_id: FBOId(external_fbo_id), 1476 offset, 1477 } 1478 } 1479 DrawTarget::Texture { fbo_id, .. } => { 1480 ReadTarget::Texture { fbo_id } 1481 } 1482 DrawTarget::External { fbo, .. } => { 1483 ReadTarget::External { fbo } 1484 } 1485 } 1486 } 1487 } 1488 1489 /// Parses the major, release, and patch versions from a GL_VERSION string on 1490 /// Mali devices. For example, for the version string 1491 /// "OpenGL ES 3.2 v1.r36p0-01eac0.28ab3a577f105e026887e2b4c93552fb" this 1492 /// returns Some((1, 36, 0)). Returns None if the version cannot be parsed. 1493 fn parse_mali_version(version_string: &str) -> Option<(u32, u32, u32)> { 1494 let (_prefix, version_string) = version_string.split_once("v")?; 1495 let (v_str, version_string) = version_string.split_once(".r")?; 1496 let v = v_str.parse().ok()?; 1497 1498 let (r_str, version_string) = version_string.split_once("p")?; 1499 let r = r_str.parse().ok()?; 1500 1501 // Not all devices have the trailing string following the "p" number. 1502 let (p_str, _) = version_string.split_once("-").unwrap_or((version_string, "")); 1503 let p = p_str.parse().ok()?; 1504 1505 Some((v, r, p)) 1506 } 1507 1508 /// Returns whether this GPU belongs to the Mali Midgard family 1509 fn is_mali_midgard(renderer_name: &str) -> bool { 1510 renderer_name.starts_with("Mali-T") 1511 } 1512 1513 /// Returns whether this GPU belongs to the Mali Bifrost family 1514 fn is_mali_bifrost(renderer_name: &str) -> bool { 1515 renderer_name == "Mali-G31" 1516 || renderer_name == "Mali-G51" 1517 || renderer_name == "Mali-G71" 1518 || renderer_name == "Mali-G52" 1519 || renderer_name == "Mali-G72" 1520 || renderer_name == "Mali-G76" 1521 } 1522 1523 /// Returns whether this GPU belongs to the Mali Valhall family 1524 fn is_mali_valhall(renderer_name: &str) -> bool { 1525 // As new Valhall GPUs may be released in the future we match all Mali-G models, apart from 1526 // Bifrost models (of which we don't expect any new ones to be released) 1527 renderer_name.starts_with("Mali-G") && !is_mali_bifrost(renderer_name) 1528 } 1529 #[inline(never)] 1530 fn gl_error_string(code: u32) -> &'static str { 1531 match code { 1532 gl::INVALID_ENUM => "GL_INVALID_ENUM", 1533 gl::INVALID_VALUE => "GL_INVALID_VALUE", 1534 gl::INVALID_OPERATION => "GL_INVALID_OPERATION", 1535 gl::STACK_OVERFLOW => "GL_STACK_OVERFLOW", 1536 gl::STACK_UNDERFLOW => "GL_STACK_UNDERFLOW", 1537 gl::OUT_OF_MEMORY => "GL_OUT_OF_MEMORY", 1538 gl::INVALID_FRAMEBUFFER_OPERATION => "GL_INVALID_FRAMEBUFFER_OPERATION", 1539 0x507 => "GL_CONTEXT_LOST", 1540 _ => "(unknown error code)", 1541 } 1542 } 1543 1544 impl Device { 1545 pub fn new( 1546 mut gl: Rc<dyn gl::Gl>, 1547 crash_annotator: Option<Box<dyn CrashAnnotator>>, 1548 resource_override_path: Option<PathBuf>, 1549 use_optimized_shaders: bool, 1550 upload_method: UploadMethod, 1551 batched_upload_threshold: i32, 1552 cached_programs: Option<Rc<ProgramCache>>, 1553 allow_texture_storage_support: bool, 1554 allow_texture_swizzling: bool, 1555 dump_shader_source: Option<String>, 1556 surface_origin_is_top_left: bool, 1557 panic_on_gl_error: bool, 1558 ) -> Device { 1559 let mut max_texture_size = [0]; 1560 unsafe { 1561 gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size); 1562 } 1563 1564 // We cap the max texture size at 16384. Some hardware report higher 1565 // capabilities but get very unstable with very large textures. 1566 // Bug 1702494 tracks re-evaluating this cap. 1567 let max_texture_size = max_texture_size[0].min(16384); 1568 1569 let renderer_name = gl.get_string(gl::RENDERER); 1570 info!("Renderer: {}", renderer_name); 1571 let version_string = gl.get_string(gl::VERSION); 1572 info!("Version: {}", version_string); 1573 info!("Max texture size: {}", max_texture_size); 1574 1575 let mut extension_count = [0]; 1576 unsafe { 1577 gl.get_integer_v(gl::NUM_EXTENSIONS, &mut extension_count); 1578 } 1579 let extension_count = extension_count[0] as gl::GLuint; 1580 let mut extensions = Vec::new(); 1581 for i in 0 .. extension_count { 1582 extensions.push(gl.get_string_i(gl::EXTENSIONS, i)); 1583 } 1584 1585 // We block this on Mali Valhall GPUs as the extension's functions always return 1586 // GL_OUT_OF_MEMORY, causing us to panic in debug builds. 1587 let supports_khr_debug = supports_extension(&extensions, "GL_KHR_debug") 1588 && !is_mali_valhall(&renderer_name); 1589 1590 // On debug builds, assert that each GL call is error-free. We don't do 1591 // this on release builds because the synchronous call can stall the 1592 // pipeline. 1593 if panic_on_gl_error || cfg!(debug_assertions) { 1594 gl = gl::ErrorReactingGl::wrap(gl, move |gl, name, code| { 1595 if supports_khr_debug { 1596 Self::log_driver_messages(gl); 1597 } 1598 let err_name = gl_error_string(code); 1599 error!("Caught GL error 0x{:x} {} at {}", code, err_name, name); 1600 panic!("Caught GL error 0x{:x} {} at {}", code, err_name, name); 1601 }); 1602 } 1603 1604 if supports_extension(&extensions, "GL_ANGLE_provoking_vertex") { 1605 gl.provoking_vertex_angle(gl::FIRST_VERTEX_CONVENTION); 1606 } 1607 1608 let supports_texture_usage = supports_extension(&extensions, "GL_ANGLE_texture_usage"); 1609 1610 // Our common-case image data in Firefox is BGRA, so we make an effort 1611 // to use BGRA as the internal texture storage format to avoid the need 1612 // to swizzle during upload. Currently we only do this on GLES (and thus 1613 // for Windows, via ANGLE). 1614 // 1615 // On Mac, Apple docs [1] claim that BGRA is a more efficient internal 1616 // format, but they don't support it with glTextureStorage. As a workaround, 1617 // we pretend that it's RGBA8 for the purposes of texture transfers, 1618 // but swizzle R with B for the texture sampling. 1619 // 1620 // We also need our internal format types to be sized, since glTexStorage* 1621 // will reject non-sized internal format types. 1622 // 1623 // Unfortunately, with GL_EXT_texture_format_BGRA8888, BGRA8 is not a 1624 // valid internal format (for glTexImage* or glTexStorage*) unless 1625 // GL_EXT_texture_storage is also available [2][3], which is usually 1626 // not the case on GLES 3 as the latter's functionality has been 1627 // included by default but the former has not been updated. 1628 // The extension is available on ANGLE, but on Android this usually 1629 // means we must fall back to using unsized BGRA and glTexImage*. 1630 // 1631 // Overall, we have the following factors in play when choosing the formats: 1632 // - with glTexStorage, the internal format needs to match the external format, 1633 // or the driver would have to do the conversion, which is slow 1634 // - on desktop GL, there is no BGRA internal format. However, initializing 1635 // the textures with glTexImage as RGBA appears to use BGRA internally, 1636 // preferring BGRA external data [4]. 1637 // - when glTexStorage + BGRA internal format is not supported, 1638 // and the external data is BGRA, we have the following options: 1639 // 1. use glTexImage with RGBA internal format, this costs us VRAM for mipmaps 1640 // 2. use glTexStorage with RGBA internal format, this costs us the conversion by the driver 1641 // 3. pretend we are uploading RGBA and set up the swizzling of the texture unit - this costs us batch breaks 1642 // 1643 // [1] https://developer.apple.com/library/archive/documentation/ 1644 // GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/ 1645 // opengl_texturedata.html#//apple_ref/doc/uid/TP40001987-CH407-SW22 1646 // [2] https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_format_BGRA8888.txt 1647 // [3] https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_storage.txt 1648 // [4] http://http.download.nvidia.com/developer/Papers/2005/Fast_Texture_Transfers/Fast_Texture_Transfers.pdf 1649 1650 // On the android emulator glTexImage fails to create textures larger than 3379. 1651 // So we must use glTexStorage instead. See bug 1591436. 1652 let is_emulator = renderer_name.starts_with("Android Emulator"); 1653 let avoid_tex_image = is_emulator; 1654 let mut gl_version = [0; 2]; 1655 unsafe { 1656 gl.get_integer_v(gl::MAJOR_VERSION, &mut gl_version[0..1]); 1657 gl.get_integer_v(gl::MINOR_VERSION, &mut gl_version[1..2]); 1658 } 1659 info!("GL context {:?} {}.{}", gl.get_type(), gl_version[0], gl_version[1]); 1660 1661 // We block texture storage on mac because it doesn't support BGRA 1662 let supports_texture_storage = allow_texture_storage_support && !cfg!(target_os = "macos") && 1663 match gl.get_type() { 1664 gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_texture_storage"), 1665 gl::GlType::Gles => true, 1666 }; 1667 1668 // The GL_EXT_texture_format_BGRA8888 extension allows us to use BGRA as an internal format 1669 // with glTexImage on GLES. However, we can only use BGRA8 as an internal format for 1670 // glTexStorage when GL_EXT_texture_storage is also explicitly supported. This is because 1671 // glTexStorage was added in GLES 3, but GL_EXT_texture_format_BGRA8888 was written against 1672 // GLES 2 and GL_EXT_texture_storage. 1673 // To complicate things even further, some Intel devices claim to support both extensions 1674 // but in practice do not allow BGRA to be used with glTexStorage. 1675 let supports_gles_bgra = supports_extension(&extensions, "GL_EXT_texture_format_BGRA8888"); 1676 let supports_texture_storage_with_gles_bgra = supports_gles_bgra 1677 && supports_extension(&extensions, "GL_EXT_texture_storage") 1678 && !renderer_name.starts_with("Intel(R) HD Graphics for BayTrail") 1679 && !renderer_name.starts_with("Intel(R) HD Graphics for Atom(TM) x5/x7"); 1680 1681 let supports_texture_swizzle = allow_texture_swizzling && 1682 match gl.get_type() { 1683 // see https://www.g-truc.net/post-0734.html 1684 gl::GlType::Gl => gl_version >= [3, 3] || 1685 supports_extension(&extensions, "GL_ARB_texture_swizzle"), 1686 gl::GlType::Gles => true, 1687 }; 1688 1689 let (color_formats, bgra_formats, bgra_pixel_type, bgra8_sampling_swizzle, texture_storage_usage) = match gl.get_type() { 1690 // There is `glTexStorage`, use it and expect RGBA on the input. 1691 gl::GlType::Gl if supports_texture_storage && supports_texture_swizzle => ( 1692 TextureFormatPair::from(ImageFormat::RGBA8), 1693 TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA }, 1694 gl::UNSIGNED_BYTE, 1695 Swizzle::Bgra, // pretend it's RGBA, rely on swizzling 1696 TexStorageUsage::Always 1697 ), 1698 // There is no `glTexStorage`, upload as `glTexImage` with BGRA input. 1699 gl::GlType::Gl => ( 1700 TextureFormatPair { internal: ImageFormat::BGRA8, external: ImageFormat::BGRA8 }, 1701 TextureFormatPair { internal: gl::RGBA, external: gl::BGRA }, 1702 gl::UNSIGNED_INT_8_8_8_8_REV, 1703 Swizzle::Rgba, // converted on uploads by the driver, no swizzling needed 1704 TexStorageUsage::Never 1705 ), 1706 // glTexStorage is always supported in GLES 3, but because the GL_EXT_texture_storage 1707 // extension is supported we can use glTexStorage with BGRA8 as the internal format. 1708 // Prefer BGRA textures over RGBA. 1709 gl::GlType::Gles if supports_texture_storage_with_gles_bgra => ( 1710 TextureFormatPair::from(ImageFormat::BGRA8), 1711 TextureFormatPair { internal: gl::BGRA8_EXT, external: gl::BGRA_EXT }, 1712 gl::UNSIGNED_BYTE, 1713 Swizzle::Rgba, // no conversion needed 1714 TexStorageUsage::Always, 1715 ), 1716 // BGRA is not supported as an internal format with glTexStorage, therefore we will 1717 // use RGBA textures instead and pretend BGRA data is RGBA when uploading. 1718 // The swizzling will happen at the texture unit. 1719 gl::GlType::Gles if supports_texture_swizzle => ( 1720 TextureFormatPair::from(ImageFormat::RGBA8), 1721 TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA }, 1722 gl::UNSIGNED_BYTE, 1723 Swizzle::Bgra, // pretend it's RGBA, rely on swizzling 1724 TexStorageUsage::Always, 1725 ), 1726 // BGRA is not supported as an internal format with glTexStorage, and we cannot use 1727 // swizzling either. Therefore prefer BGRA textures over RGBA, but use glTexImage 1728 // to initialize BGRA textures. glTexStorage can still be used for other formats. 1729 gl::GlType::Gles if supports_gles_bgra && !avoid_tex_image => ( 1730 TextureFormatPair::from(ImageFormat::BGRA8), 1731 TextureFormatPair::from(gl::BGRA_EXT), 1732 gl::UNSIGNED_BYTE, 1733 Swizzle::Rgba, // no conversion needed 1734 TexStorageUsage::NonBGRA8, 1735 ), 1736 // Neither BGRA or swizzling are supported. GLES does not allow format conversion 1737 // during upload so we must use RGBA textures and pretend BGRA data is RGBA when 1738 // uploading. Images may be rendered incorrectly as a result. 1739 gl::GlType::Gles => { 1740 warn!("Neither BGRA or texture swizzling are supported. Images may be rendered incorrectly."); 1741 ( 1742 TextureFormatPair::from(ImageFormat::RGBA8), 1743 TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA }, 1744 gl::UNSIGNED_BYTE, 1745 Swizzle::Rgba, 1746 TexStorageUsage::Always, 1747 ) 1748 } 1749 }; 1750 1751 let is_software_webrender = renderer_name.starts_with("Software WebRender"); 1752 let upload_method = if is_software_webrender { 1753 // Uploads in SWGL generally reduce to simple memory copies. 1754 UploadMethod::Immediate 1755 } else { 1756 upload_method 1757 }; 1758 // Prefer 24-bit depth format. While 16-bit depth also works, it may exhaust depth ids easily. 1759 let depth_format = gl::DEPTH_COMPONENT24; 1760 1761 info!("GL texture cache {:?}, bgra {:?} swizzle {:?}, texture storage {:?}, depth {:?}", 1762 color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage, depth_format); 1763 1764 // On Mali-T devices glCopyImageSubData appears to stall the pipeline until any pending 1765 // renders to the source texture have completed. On Mali-G, it has been observed to 1766 // indefinitely hang in some circumstances. Using an alternative such as glBlitFramebuffer 1767 // is preferable on such devices, so pretend we don't support glCopyImageSubData. 1768 // See bugs 1669494 and 1677757. 1769 let supports_copy_image_sub_data = if renderer_name.starts_with("Mali") { 1770 false 1771 } else { 1772 supports_extension(&extensions, "GL_EXT_copy_image") || 1773 supports_extension(&extensions, "GL_ARB_copy_image") 1774 }; 1775 1776 // We have seen crashes on x86 PowerVR Rogue G6430 devices during GPU cache 1777 // updates using the scatter shader. It seems likely that GL_EXT_color_buffer_float 1778 // is broken. See bug 1709408. 1779 let is_x86_powervr_rogue_g6430 = renderer_name.starts_with("PowerVR Rogue G6430") 1780 && cfg!(target_arch = "x86"); 1781 let supports_color_buffer_float = match gl.get_type() { 1782 gl::GlType::Gl => true, 1783 gl::GlType::Gles if is_x86_powervr_rogue_g6430 => false, 1784 gl::GlType::Gles => supports_extension(&extensions, "GL_EXT_color_buffer_float"), 1785 }; 1786 1787 let is_adreno = renderer_name.starts_with("Adreno"); 1788 1789 // There appears to be a driver bug on older versions of the Adreno 1790 // driver which prevents usage of persistenly mapped buffers. 1791 // See bugs 1678585 and 1683936. 1792 // TODO: only disable feature for affected driver versions. 1793 let supports_buffer_storage = if is_adreno { 1794 false 1795 } else { 1796 supports_extension(&extensions, "GL_EXT_buffer_storage") || 1797 supports_extension(&extensions, "GL_ARB_buffer_storage") 1798 }; 1799 1800 // KHR_blend_equation_advanced renders incorrectly on Adreno 1801 // devices. This has only been confirmed up to Adreno 5xx, and has been 1802 // fixed for Android 9, so this condition could be made more specific. 1803 let supports_advanced_blend_equation = 1804 supports_extension(&extensions, "GL_KHR_blend_equation_advanced") && 1805 !is_adreno; 1806 1807 let supports_dual_source_blending = match gl.get_type() { 1808 gl::GlType::Gl => supports_extension(&extensions,"GL_ARB_blend_func_extended") && 1809 supports_extension(&extensions,"GL_ARB_explicit_attrib_location"), 1810 gl::GlType::Gles => supports_extension(&extensions,"GL_EXT_blend_func_extended"), 1811 }; 1812 1813 // Software webrender relies on the unoptimized shader source. 1814 let use_optimized_shaders = use_optimized_shaders && !is_software_webrender; 1815 1816 // On the android emulator, and possibly some Mali devices, glShaderSource 1817 // can crash if the source strings are not null-terminated. 1818 // See bug 1591945 and bug 1799722. 1819 let requires_null_terminated_shader_source = is_emulator || renderer_name == "Mali-T628" 1820 || renderer_name == "Mali-T720" || renderer_name == "Mali-T760" 1821 || renderer_name == "Mali-G57"; 1822 1823 // The android emulator gets confused if you don't explicitly unbind any texture 1824 // from GL_TEXTURE_EXTERNAL_OES before binding another to GL_TEXTURE_2D. See bug 1636085. 1825 let requires_texture_external_unbind = is_emulator; 1826 1827 let is_macos = cfg!(target_os = "macos"); 1828 // && renderer_name.starts_with("AMD"); 1829 // (XXX: we apply this restriction to all GPUs to handle switching) 1830 1831 let is_windows_angle = cfg!(target_os = "windows") 1832 && renderer_name.starts_with("ANGLE"); 1833 let is_adreno_3xx = renderer_name.starts_with("Adreno (TM) 3"); 1834 1835 // Some GPUs require the stride of the data during texture uploads to be 1836 // aligned to certain requirements, either for correctness or performance 1837 // reasons. 1838 let required_pbo_stride = if is_adreno_3xx { 1839 // On Adreno 3xx, alignments of < 128 bytes can result in corrupted 1840 // glyphs. See bug 1696039. 1841 StrideAlignment::Bytes(NonZeroUsize::new(128).unwrap()) 1842 } else if is_adreno { 1843 // On later Adreno devices it must be a multiple of 64 *pixels* to 1844 // hit the fast path, meaning value in bytes varies with the texture 1845 // format. This is purely an optimization. 1846 StrideAlignment::Pixels(NonZeroUsize::new(64).unwrap()) 1847 } else if is_macos { 1848 // On AMD Mac, it must always be a multiple of 256 bytes. 1849 // We apply this restriction to all GPUs to handle switching 1850 StrideAlignment::Bytes(NonZeroUsize::new(256).unwrap()) 1851 } else if is_windows_angle { 1852 // On ANGLE-on-D3D, PBO texture uploads get incorrectly truncated 1853 // if the stride is greater than the width * bpp. 1854 StrideAlignment::Bytes(NonZeroUsize::new(1).unwrap()) 1855 } else { 1856 // Other platforms may have similar requirements and should be added 1857 // here. The default value should be 4 bytes. 1858 StrideAlignment::Bytes(NonZeroUsize::new(4).unwrap()) 1859 }; 1860 1861 // On AMD Macs there is a driver bug which causes some texture uploads 1862 // from a non-zero offset within a PBO to fail. See bug 1603783. 1863 let supports_nonzero_pbo_offsets = !is_macos; 1864 1865 // We have encountered several issues when only partially updating render targets on a 1866 // variety of Mali GPUs. As a precaution avoid doing so on all Midgard and Bifrost GPUs. 1867 // Valhall (eg Mali-Gx7 onwards) appears to be unaffected. See bug 1691955, bug 1558374, 1868 // and bug 1663355. 1869 // We have Additionally encountered issues on PowerVR D-Series. See bug 2005312. 1870 let supports_render_target_partial_update = !is_mali_midgard(&renderer_name) 1871 && !is_mali_bifrost(&renderer_name) 1872 && !renderer_name.starts_with("PowerVR D-Series"); 1873 1874 let supports_shader_storage_object = match gl.get_type() { 1875 // see https://www.g-truc.net/post-0734.html 1876 gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_shader_storage_buffer_object"), 1877 gl::GlType::Gles => gl_version >= [3, 1], 1878 }; 1879 1880 // SWGL uses swgl_clipMask() instead of implementing clip-masking in shaders. 1881 // This allows certain shaders to potentially bypass the more expensive alpha- 1882 // pass variants if they know the alpha-pass was only required to deal with 1883 // clip-masking. 1884 let uses_native_clip_mask = is_software_webrender; 1885 1886 // SWGL uses swgl_antiAlias() instead of implementing anti-aliasing in shaders. 1887 // As above, this allows bypassing certain alpha-pass variants. 1888 let uses_native_antialiasing = is_software_webrender; 1889 1890 // If running on android with a mesa driver (eg intel chromebooks), parse the mesa version. 1891 let mut android_mesa_version = None; 1892 if cfg!(target_os = "android") && renderer_name.starts_with("Mesa") { 1893 if let Some((_, mesa_version)) = version_string.split_once("Mesa ") { 1894 if let Some((major_str, _)) = mesa_version.split_once(".") { 1895 if let Ok(major) = major_str.parse::<i32>() { 1896 android_mesa_version = Some(major); 1897 } 1898 } 1899 } 1900 } 1901 1902 // If the device supports OES_EGL_image_external_essl3 we can use it to render 1903 // external images. If not, we must use the ESSL 1.0 OES_EGL_image_external 1904 // extension instead. 1905 // Mesa versions prior to 20.0 do not implement textureSize(samplerExternalOES), 1906 // so we must use the fallback path. 1907 let supports_image_external_essl3 = match android_mesa_version { 1908 Some(major) if major < 20 => false, 1909 _ => supports_extension(&extensions, "GL_OES_EGL_image_external_essl3"), 1910 }; 1911 1912 let mut requires_batched_texture_uploads = None; 1913 if is_software_webrender { 1914 // No benefit to batching texture uploads with swgl. 1915 requires_batched_texture_uploads = Some(false); 1916 } else if renderer_name.starts_with("Mali-G") { 1917 // On Mali-Gxx the driver really struggles with many small texture uploads, 1918 // and handles fewer, larger uploads better. 1919 requires_batched_texture_uploads = Some(true); 1920 } 1921 1922 // On Mali-Txxx devices we have observed crashes during draw calls when rendering 1923 // to an alpha target immediately after using glClear to clear regions of it. 1924 // Using a shader to clear the regions avoids the crash. See bug 1638593. 1925 // On Adreno 510 devices we have seen garbage being used as masks when clearing 1926 // alpha targets with glClear. Using quads to clear avoids this. See bug 1941154. 1927 let is_adreno_510 = renderer_name.starts_with("Adreno (TM) 510"); 1928 let supports_alpha_target_clears = !is_mali_midgard(&renderer_name) && !is_adreno_510; 1929 1930 // On Adreno 4xx devices with older drivers we have seen render tasks to alpha targets have 1931 // no effect unless the target is fully cleared prior to rendering. See bug 1714227. 1932 let is_adreno_4xx = renderer_name.starts_with("Adreno (TM) 4"); 1933 let requires_alpha_target_full_clear = is_adreno_4xx; 1934 1935 // Testing on Intel and nVidia GPUs, as well as software webrender, showed large performance 1936 // wins applying a scissor rect when clearing render targets. Assume this is the best 1937 // default. On mobile GPUs, however, it can be much more efficient to clear the entire 1938 // render target. For now, enable the scissor everywhere except Android hardware 1939 // webrender. We can tweak this further if needs be. 1940 let prefers_clear_scissor = !cfg!(target_os = "android") || is_software_webrender; 1941 1942 let mut supports_render_target_invalidate = true; 1943 1944 // On PowerVR Rogue devices we have seen that invalidating render targets after we are done 1945 // with them can incorrectly cause pending renders to be written to different targets 1946 // instead. See bug 1719345. 1947 let is_powervr_rogue = renderer_name.starts_with("PowerVR Rogue"); 1948 if is_powervr_rogue { 1949 supports_render_target_invalidate = false; 1950 } 1951 1952 // On Mali Valhall devices with a driver version v1.r36p0 we have seen that invalidating 1953 // render targets can result in image corruption, perhaps due to subsequent reuses of the 1954 // render target not correctly reinitializing them to a valid state. See bug 1787520. 1955 if is_mali_valhall(&renderer_name) { 1956 match parse_mali_version(&version_string) { 1957 Some(version) if version >= (1, 36, 0) => supports_render_target_invalidate = false, 1958 _ => {} 1959 } 1960 } 1961 1962 // On Linux we we have seen uploads to R8 format textures result in 1963 // corruption on some AMD cards. 1964 // See https://bugzilla.mozilla.org/show_bug.cgi?id=1687554#c13 1965 let supports_r8_texture_upload = if cfg!(target_os = "linux") 1966 && renderer_name.starts_with("AMD Radeon RX") 1967 { 1968 false 1969 } else { 1970 true 1971 }; 1972 1973 let supports_qcom_tiled_rendering = if is_adreno && version_string.contains("V@0490") { 1974 // We have encountered rendering errors on a variety of Adreno GPUs specifically on 1975 // driver version V@0490, so block this extension on that driver version. See bug 1828248. 1976 false 1977 } else if renderer_name == "Adreno (TM) 308" { 1978 // And specifically on Areno 308 GPUs we have encountered rendering errors on driver 1979 // versions V@331, V@415, and V@0502. We presume this therefore affects all driver 1980 // versions. See bug 1843749 and bug 1847319. 1981 false 1982 } else { 1983 supports_extension(&extensions, "GL_QCOM_tiled_rendering") 1984 }; 1985 1986 // On some Adreno 3xx devices the vertex array object must be unbound and rebound after 1987 // an attached buffer has been orphaned. 1988 let requires_vao_rebind_after_orphaning = is_adreno_3xx; 1989 1990 Device { 1991 gl, 1992 base_gl: None, 1993 crash_annotator, 1994 annotate_draw_call_crashes: false, 1995 resource_override_path, 1996 use_optimized_shaders, 1997 upload_method, 1998 use_batched_texture_uploads: requires_batched_texture_uploads.unwrap_or(false), 1999 use_draw_calls_for_texture_copy: false, 2000 batched_upload_threshold, 2001 2002 inside_frame: false, 2003 2004 capabilities: Capabilities { 2005 supports_multisampling: false, //TODO 2006 supports_copy_image_sub_data, 2007 supports_color_buffer_float, 2008 supports_buffer_storage, 2009 supports_advanced_blend_equation, 2010 supports_dual_source_blending, 2011 supports_khr_debug, 2012 supports_texture_swizzle, 2013 supports_nonzero_pbo_offsets, 2014 supports_texture_usage, 2015 supports_render_target_partial_update, 2016 supports_shader_storage_object, 2017 requires_batched_texture_uploads, 2018 supports_alpha_target_clears, 2019 requires_alpha_target_full_clear, 2020 prefers_clear_scissor, 2021 supports_render_target_invalidate, 2022 supports_r8_texture_upload, 2023 supports_qcom_tiled_rendering, 2024 uses_native_clip_mask, 2025 uses_native_antialiasing, 2026 supports_image_external_essl3, 2027 requires_vao_rebind_after_orphaning, 2028 renderer_name, 2029 }, 2030 2031 color_formats, 2032 bgra_formats, 2033 bgra_pixel_type, 2034 swizzle_settings: SwizzleSettings { 2035 bgra8_sampling_swizzle, 2036 }, 2037 depth_format, 2038 2039 depth_targets: FastHashMap::default(), 2040 2041 bound_textures: [0; 16], 2042 bound_program: 0, 2043 bound_program_name: Rc::new(std::ffi::CString::new("").unwrap()), 2044 bound_vao: 0, 2045 bound_read_fbo: (FBOId(0), DeviceIntPoint::zero()), 2046 bound_draw_fbo: FBOId(0), 2047 default_read_fbo: FBOId(0), 2048 default_draw_fbo: FBOId(0), 2049 2050 depth_available: true, 2051 2052 max_texture_size, 2053 cached_programs, 2054 frame_id: GpuFrameId(0), 2055 extensions, 2056 texture_storage_usage, 2057 requires_null_terminated_shader_source, 2058 requires_texture_external_unbind, 2059 is_software_webrender, 2060 required_pbo_stride, 2061 dump_shader_source, 2062 surface_origin_is_top_left, 2063 2064 #[cfg(debug_assertions)] 2065 shader_is_ready: false, 2066 2067 textures_created: 0, 2068 textures_deleted: 0, 2069 } 2070 } 2071 2072 pub fn gl(&self) -> &dyn gl::Gl { 2073 &*self.gl 2074 } 2075 2076 pub fn rc_gl(&self) -> &Rc<dyn gl::Gl> { 2077 &self.gl 2078 } 2079 2080 pub fn set_parameter(&mut self, param: &Parameter) { 2081 match param { 2082 Parameter::Bool(BoolParameter::PboUploads, enabled) => { 2083 if !self.is_software_webrender { 2084 self.upload_method = if *enabled { 2085 UploadMethod::PixelBuffer(crate::ONE_TIME_USAGE_HINT) 2086 } else { 2087 UploadMethod::Immediate 2088 }; 2089 } 2090 } 2091 Parameter::Bool(BoolParameter::BatchedUploads, enabled) => { 2092 if self.capabilities.requires_batched_texture_uploads.is_none() { 2093 self.use_batched_texture_uploads = *enabled; 2094 } 2095 } 2096 Parameter::Bool(BoolParameter::DrawCallsForTextureCopy, enabled) => { 2097 self.use_draw_calls_for_texture_copy = *enabled; 2098 } 2099 Parameter::Int(IntParameter::BatchedUploadThreshold, threshold) => { 2100 self.batched_upload_threshold = *threshold; 2101 } 2102 _ => {} 2103 } 2104 } 2105 2106 /// Ensures that the maximum texture size is less than or equal to the 2107 /// provided value. If the provided value is less than the value supported 2108 /// by the driver, the latter is used. 2109 pub fn clamp_max_texture_size(&mut self, size: i32) { 2110 self.max_texture_size = self.max_texture_size.min(size); 2111 } 2112 2113 /// Returns the limit on texture dimensions (width or height). 2114 pub fn max_texture_size(&self) -> i32 { 2115 self.max_texture_size 2116 } 2117 2118 pub fn surface_origin_is_top_left(&self) -> bool { 2119 self.surface_origin_is_top_left 2120 } 2121 2122 pub fn get_capabilities(&self) -> &Capabilities { 2123 &self.capabilities 2124 } 2125 2126 pub fn preferred_color_formats(&self) -> TextureFormatPair<ImageFormat> { 2127 self.color_formats.clone() 2128 } 2129 2130 pub fn swizzle_settings(&self) -> Option<SwizzleSettings> { 2131 if self.capabilities.supports_texture_swizzle { 2132 Some(self.swizzle_settings) 2133 } else { 2134 None 2135 } 2136 } 2137 2138 pub fn depth_bits(&self) -> i32 { 2139 match self.depth_format { 2140 gl::DEPTH_COMPONENT16 => 16, 2141 gl::DEPTH_COMPONENT24 => 24, 2142 _ => panic!("Unknown depth format {:?}", self.depth_format), 2143 } 2144 } 2145 2146 // See gpu_types.rs where we declare the number of possible documents and 2147 // number of items per document. This should match up with that. 2148 pub fn max_depth_ids(&self) -> i32 { 2149 return 1 << (self.depth_bits() - RESERVE_DEPTH_BITS); 2150 } 2151 2152 pub fn ortho_near_plane(&self) -> f32 { 2153 return -self.max_depth_ids() as f32; 2154 } 2155 2156 pub fn ortho_far_plane(&self) -> f32 { 2157 return (self.max_depth_ids() - 1) as f32; 2158 } 2159 2160 pub fn required_pbo_stride(&self) -> StrideAlignment { 2161 self.required_pbo_stride 2162 } 2163 2164 pub fn upload_method(&self) -> &UploadMethod { 2165 &self.upload_method 2166 } 2167 2168 pub fn use_batched_texture_uploads(&self) -> bool { 2169 self.use_batched_texture_uploads 2170 } 2171 2172 pub fn use_draw_calls_for_texture_copy(&self) -> bool { 2173 self.use_draw_calls_for_texture_copy 2174 } 2175 2176 pub fn batched_upload_threshold(&self) -> i32 { 2177 self.batched_upload_threshold 2178 } 2179 2180 pub fn reset_state(&mut self) { 2181 for i in 0 .. self.bound_textures.len() { 2182 self.bound_textures[i] = 0; 2183 self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint); 2184 self.gl.bind_texture(gl::TEXTURE_2D, 0); 2185 } 2186 2187 self.bound_vao = 0; 2188 self.gl.bind_vertex_array(0); 2189 2190 self.bound_read_fbo = (self.default_read_fbo, DeviceIntPoint::zero()); 2191 self.gl.bind_framebuffer(gl::READ_FRAMEBUFFER, self.default_read_fbo.0); 2192 2193 self.bound_draw_fbo = self.default_draw_fbo; 2194 self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, self.bound_draw_fbo.0); 2195 } 2196 2197 #[cfg(debug_assertions)] 2198 fn print_shader_errors(source: &str, log: &str) { 2199 // hacky way to extract the offending lines 2200 if !log.starts_with("0:") && !log.starts_with("0(") { 2201 return; 2202 } 2203 let end_pos = match log[2..].chars().position(|c| !c.is_digit(10)) { 2204 Some(pos) => 2 + pos, 2205 None => return, 2206 }; 2207 let base_line_number = match log[2 .. end_pos].parse::<usize>() { 2208 Ok(number) if number >= 2 => number - 2, 2209 _ => return, 2210 }; 2211 for (line, prefix) in source.lines().skip(base_line_number).zip(&["|",">","|"]) { 2212 error!("{}\t{}", prefix, line); 2213 } 2214 } 2215 2216 pub fn compile_shader( 2217 &self, 2218 name: &str, 2219 shader_type: gl::GLenum, 2220 source: &String, 2221 ) -> Result<gl::GLuint, ShaderError> { 2222 debug!("compile {}", name); 2223 let id = self.gl.create_shader(shader_type); 2224 2225 let mut new_source = Cow::from(source.as_str()); 2226 // Ensure the source strings we pass to glShaderSource are 2227 // null-terminated on buggy platforms. 2228 if self.requires_null_terminated_shader_source { 2229 new_source.to_mut().push('\0'); 2230 } 2231 2232 self.gl.shader_source(id, &[new_source.as_bytes()]); 2233 self.gl.compile_shader(id); 2234 let log = self.gl.get_shader_info_log(id); 2235 let mut status = [0]; 2236 unsafe { 2237 self.gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status); 2238 } 2239 if status[0] == 0 { 2240 let type_str = match shader_type { 2241 gl::VERTEX_SHADER => "vertex", 2242 gl::FRAGMENT_SHADER => "fragment", 2243 _ => panic!("Unexpected shader type {:x}", shader_type), 2244 }; 2245 error!("Failed to compile {} shader: {}\n{}", type_str, name, log); 2246 #[cfg(debug_assertions)] 2247 Self::print_shader_errors(source, &log); 2248 Err(ShaderError::Compilation(name.to_string(), log)) 2249 } else { 2250 if !log.is_empty() { 2251 warn!("Warnings detected on shader: {}\n{}", name, log); 2252 } 2253 Ok(id) 2254 } 2255 } 2256 2257 pub fn begin_frame(&mut self) -> GpuFrameId { 2258 debug_assert!(!self.inside_frame); 2259 self.inside_frame = true; 2260 #[cfg(debug_assertions)] 2261 { 2262 self.shader_is_ready = false; 2263 } 2264 2265 self.textures_created = 0; 2266 self.textures_deleted = 0; 2267 2268 // If our profiler state has changed, apply or remove the profiling 2269 // wrapper from our GL context. 2270 let being_profiled = profiler::thread_is_being_profiled(); 2271 let using_wrapper = self.base_gl.is_some(); 2272 2273 // We can usually unwind driver stacks on OSes other than Android, so we don't need to 2274 // manually instrument gl calls there. Timestamps can be pretty expensive on Windows (2us 2275 // each and perhaps an opportunity to be descheduled?) which makes the profiles gathered 2276 // with this turned on less useful so only profile on ARM Android. 2277 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) 2278 && cfg!(target_os = "android") 2279 && being_profiled 2280 && !using_wrapper 2281 { 2282 fn note(name: &str, duration: Duration) { 2283 profiler::add_text_marker("OpenGL Calls", name, duration); 2284 } 2285 let threshold = Duration::from_millis(1); 2286 let wrapped = gl::ProfilingGl::wrap(self.gl.clone(), threshold, note); 2287 let base = mem::replace(&mut self.gl, wrapped); 2288 self.base_gl = Some(base); 2289 } else if !being_profiled && using_wrapper { 2290 self.gl = self.base_gl.take().unwrap(); 2291 } 2292 2293 // Retrieve the currently set FBO. 2294 let mut default_read_fbo = [0]; 2295 unsafe { 2296 self.gl.get_integer_v(gl::READ_FRAMEBUFFER_BINDING, &mut default_read_fbo); 2297 } 2298 self.default_read_fbo = FBOId(default_read_fbo[0] as gl::GLuint); 2299 let mut default_draw_fbo = [0]; 2300 unsafe { 2301 self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING, &mut default_draw_fbo); 2302 } 2303 self.default_draw_fbo = FBOId(default_draw_fbo[0] as gl::GLuint); 2304 2305 // Shader state 2306 self.bound_program = 0; 2307 self.gl.use_program(0); 2308 2309 // Reset common state 2310 self.reset_state(); 2311 2312 // Pixel op state 2313 self.gl.pixel_store_i(gl::UNPACK_ALIGNMENT, 1); 2314 self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); 2315 2316 // Default is sampler 0, always 2317 self.gl.active_texture(gl::TEXTURE0); 2318 2319 self.frame_id 2320 } 2321 2322 fn bind_texture_impl( 2323 &mut self, 2324 slot: TextureSlot, 2325 id: gl::GLuint, 2326 target: gl::GLenum, 2327 set_swizzle: Option<Swizzle>, 2328 image_rendering: Option<ImageRendering>, 2329 ) { 2330 debug_assert!(self.inside_frame); 2331 2332 if self.bound_textures[slot.0] != id || set_swizzle.is_some() || image_rendering.is_some() { 2333 self.gl.active_texture(gl::TEXTURE0 + slot.0 as gl::GLuint); 2334 // The android emulator gets confused if you don't explicitly unbind any texture 2335 // from GL_TEXTURE_EXTERNAL_OES before binding to GL_TEXTURE_2D. See bug 1636085. 2336 if target == gl::TEXTURE_2D && self.requires_texture_external_unbind { 2337 self.gl.bind_texture(gl::TEXTURE_EXTERNAL_OES, 0); 2338 } 2339 self.gl.bind_texture(target, id); 2340 if let Some(swizzle) = set_swizzle { 2341 if self.capabilities.supports_texture_swizzle { 2342 let components = match swizzle { 2343 Swizzle::Rgba => [gl::RED, gl::GREEN, gl::BLUE, gl::ALPHA], 2344 Swizzle::Bgra => [gl::BLUE, gl::GREEN, gl::RED, gl::ALPHA], 2345 }; 2346 self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_R, components[0] as i32); 2347 self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_G, components[1] as i32); 2348 self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_B, components[2] as i32); 2349 self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_A, components[3] as i32); 2350 } else { 2351 debug_assert_eq!(swizzle, Swizzle::default()); 2352 } 2353 } 2354 if let Some(image_rendering) = image_rendering { 2355 let filter = match image_rendering { 2356 ImageRendering::Auto | ImageRendering::CrispEdges => gl::LINEAR, 2357 ImageRendering::Pixelated => gl::NEAREST, 2358 }; 2359 self.gl.tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, filter as i32); 2360 self.gl.tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, filter as i32); 2361 } 2362 self.gl.active_texture(gl::TEXTURE0); 2363 self.bound_textures[slot.0] = id; 2364 } 2365 } 2366 2367 pub fn bind_texture<S>(&mut self, slot: S, texture: &Texture, swizzle: Swizzle) 2368 where 2369 S: Into<TextureSlot>, 2370 { 2371 let old_swizzle = texture.active_swizzle.replace(swizzle); 2372 let set_swizzle = if old_swizzle != swizzle { 2373 Some(swizzle) 2374 } else { 2375 None 2376 }; 2377 self.bind_texture_impl(slot.into(), texture.id, texture.target, set_swizzle, None); 2378 } 2379 2380 pub fn bind_external_texture<S>(&mut self, slot: S, external_texture: &ExternalTexture) 2381 where 2382 S: Into<TextureSlot>, 2383 { 2384 self.bind_texture_impl( 2385 slot.into(), 2386 external_texture.id, 2387 external_texture.target, 2388 None, 2389 Some(external_texture.image_rendering), 2390 ); 2391 } 2392 2393 pub fn bind_read_target_impl( 2394 &mut self, 2395 fbo_id: FBOId, 2396 offset: DeviceIntPoint, 2397 ) { 2398 debug_assert!(self.inside_frame); 2399 2400 if self.bound_read_fbo != (fbo_id, offset) { 2401 fbo_id.bind(self.gl(), FBOTarget::Read); 2402 } 2403 2404 self.bound_read_fbo = (fbo_id, offset); 2405 } 2406 2407 pub fn bind_read_target(&mut self, target: ReadTarget) { 2408 let fbo_id = match target { 2409 ReadTarget::Default => self.default_read_fbo, 2410 ReadTarget::Texture { fbo_id } => fbo_id, 2411 ReadTarget::External { fbo } => fbo, 2412 ReadTarget::NativeSurface { fbo_id, .. } => fbo_id, 2413 }; 2414 2415 self.bind_read_target_impl(fbo_id, target.offset()) 2416 } 2417 2418 fn bind_draw_target_impl(&mut self, fbo_id: FBOId) { 2419 debug_assert!(self.inside_frame); 2420 2421 if self.bound_draw_fbo != fbo_id { 2422 self.bound_draw_fbo = fbo_id; 2423 fbo_id.bind(self.gl(), FBOTarget::Draw); 2424 } 2425 } 2426 2427 pub fn reset_read_target(&mut self) { 2428 let fbo = self.default_read_fbo; 2429 self.bind_read_target_impl(fbo, DeviceIntPoint::zero()); 2430 } 2431 2432 2433 pub fn reset_draw_target(&mut self) { 2434 let fbo = self.default_draw_fbo; 2435 self.bind_draw_target_impl(fbo); 2436 self.depth_available = true; 2437 } 2438 2439 pub fn bind_draw_target( 2440 &mut self, 2441 target: DrawTarget, 2442 ) { 2443 let (fbo_id, rect, depth_available) = match target { 2444 DrawTarget::Default { rect, .. } => { 2445 (self.default_draw_fbo, rect, false) 2446 } 2447 DrawTarget::Texture { dimensions, fbo_id, with_depth, .. } => { 2448 let rect = FramebufferIntRect::from_size( 2449 device_size_as_framebuffer_size(dimensions), 2450 ); 2451 (fbo_id, rect, with_depth) 2452 }, 2453 DrawTarget::External { fbo, size } => { 2454 (fbo, size.into(), false) 2455 } 2456 DrawTarget::NativeSurface { external_fbo_id, offset, dimensions, .. } => { 2457 ( 2458 FBOId(external_fbo_id), 2459 device_rect_as_framebuffer_rect(&DeviceIntRect::from_origin_and_size(offset, dimensions)), 2460 true 2461 ) 2462 } 2463 }; 2464 2465 self.depth_available = depth_available; 2466 self.bind_draw_target_impl(fbo_id); 2467 self.gl.viewport( 2468 rect.min.x, 2469 rect.min.y, 2470 rect.width(), 2471 rect.height(), 2472 ); 2473 } 2474 2475 /// Creates an unbound FBO object. Additional attachment API calls are 2476 /// required to make it complete. 2477 pub fn create_fbo(&mut self) -> FBOId { 2478 FBOId(self.gl.gen_framebuffers(1)[0]) 2479 } 2480 2481 /// Creates an FBO with the given texture bound as the color attachment. 2482 pub fn create_fbo_for_external_texture(&mut self, texture_id: u32) -> FBOId { 2483 let fbo = self.create_fbo(); 2484 fbo.bind(self.gl(), FBOTarget::Draw); 2485 self.gl.framebuffer_texture_2d( 2486 gl::DRAW_FRAMEBUFFER, 2487 gl::COLOR_ATTACHMENT0, 2488 gl::TEXTURE_2D, 2489 texture_id, 2490 0, 2491 ); 2492 debug_assert_eq!( 2493 self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER), 2494 gl::FRAMEBUFFER_COMPLETE, 2495 "Incomplete framebuffer", 2496 ); 2497 self.bound_draw_fbo.bind(self.gl(), FBOTarget::Draw); 2498 fbo 2499 } 2500 2501 pub fn delete_fbo(&mut self, fbo: FBOId) { 2502 self.gl.delete_framebuffers(&[fbo.0]); 2503 } 2504 2505 pub fn bind_external_draw_target(&mut self, fbo_id: FBOId) { 2506 debug_assert!(self.inside_frame); 2507 2508 if self.bound_draw_fbo != fbo_id { 2509 self.bound_draw_fbo = fbo_id; 2510 fbo_id.bind(self.gl(), FBOTarget::Draw); 2511 } 2512 } 2513 2514 /// Link a program, attaching the supplied vertex format. 2515 /// 2516 /// If `create_program()` finds a binary shader on disk, it will kick 2517 /// off linking immediately, which some drivers (notably ANGLE) run 2518 /// in parallel on background threads. As such, this function should 2519 /// ideally be run sometime later, to give the driver time to do that 2520 /// before blocking due to an API call accessing the shader. 2521 /// 2522 /// This generally means that the first run of the application will have 2523 /// to do a bunch of blocking work to compile the shader from source, but 2524 /// subsequent runs should load quickly. 2525 pub fn link_program( 2526 &mut self, 2527 program: &mut Program, 2528 descriptor: &VertexDescriptor, 2529 ) -> Result<(), ShaderError> { 2530 profile_scope!("compile shader"); 2531 2532 let _guard = CrashAnnotatorGuard::new( 2533 &self.crash_annotator, 2534 CrashAnnotation::CompileShader, 2535 &program.source_info.full_name_cstr 2536 ); 2537 2538 assert!(!program.is_initialized()); 2539 let mut build_program = true; 2540 let info = &program.source_info; 2541 2542 // See if we hit the binary shader cache 2543 if let Some(ref cached_programs) = self.cached_programs { 2544 // If the shader is not in the cache, attempt to load it from disk 2545 if cached_programs.entries.borrow().get(&program.source_info.digest).is_none() { 2546 if let Some(ref handler) = cached_programs.program_cache_handler { 2547 handler.try_load_shader_from_disk(&program.source_info.digest, cached_programs); 2548 if let Some(entry) = cached_programs.entries.borrow().get(&program.source_info.digest) { 2549 self.gl.program_binary(program.id, entry.binary.format, &entry.binary.bytes); 2550 } 2551 } 2552 } 2553 2554 if let Some(entry) = cached_programs.entries.borrow_mut().get_mut(&info.digest) { 2555 let mut link_status = [0]; 2556 unsafe { 2557 self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status); 2558 } 2559 if link_status[0] == 0 { 2560 let error_log = self.gl.get_program_info_log(program.id); 2561 error!( 2562 "Failed to load a program object with a program binary: {} renderer {}\n{}", 2563 &info.base_filename, 2564 self.capabilities.renderer_name, 2565 error_log 2566 ); 2567 if let Some(ref program_cache_handler) = cached_programs.program_cache_handler { 2568 program_cache_handler.notify_program_binary_failed(&entry.binary); 2569 } 2570 } else { 2571 entry.linked = true; 2572 build_program = false; 2573 } 2574 } 2575 } 2576 2577 // If not, we need to do a normal compile + link pass. 2578 if build_program { 2579 // Compile the vertex shader 2580 let vs_source = info.compute_source(self, ShaderKind::Vertex); 2581 let vs_id = match self.compile_shader(&info.full_name(), gl::VERTEX_SHADER, &vs_source) { 2582 Ok(vs_id) => vs_id, 2583 Err(err) => return Err(err), 2584 }; 2585 2586 // Compile the fragment shader 2587 let fs_source = info.compute_source(self, ShaderKind::Fragment); 2588 let fs_id = 2589 match self.compile_shader(&info.full_name(), gl::FRAGMENT_SHADER, &fs_source) { 2590 Ok(fs_id) => fs_id, 2591 Err(err) => { 2592 self.gl.delete_shader(vs_id); 2593 return Err(err); 2594 } 2595 }; 2596 2597 // Check if shader source should be dumped 2598 if Some(info.base_filename) == self.dump_shader_source.as_ref().map(String::as_ref) { 2599 let path = std::path::Path::new(info.base_filename); 2600 std::fs::write(path.with_extension("vert"), vs_source).unwrap(); 2601 std::fs::write(path.with_extension("frag"), fs_source).unwrap(); 2602 } 2603 2604 // Attach shaders 2605 self.gl.attach_shader(program.id, vs_id); 2606 self.gl.attach_shader(program.id, fs_id); 2607 2608 // Bind vertex attributes 2609 for (i, attr) in descriptor 2610 .vertex_attributes 2611 .iter() 2612 .chain(descriptor.instance_attributes.iter()) 2613 .enumerate() 2614 { 2615 self.gl 2616 .bind_attrib_location(program.id, i as gl::GLuint, attr.name); 2617 } 2618 2619 if self.cached_programs.is_some() { 2620 self.gl.program_parameter_i(program.id, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint); 2621 } 2622 2623 // Link! 2624 self.gl.link_program(program.id); 2625 2626 // GL recommends detaching and deleting shaders once the link 2627 // is complete (whether successful or not). This allows the driver 2628 // to free any memory associated with the parsing and compilation. 2629 self.gl.detach_shader(program.id, vs_id); 2630 self.gl.detach_shader(program.id, fs_id); 2631 self.gl.delete_shader(vs_id); 2632 self.gl.delete_shader(fs_id); 2633 2634 let mut link_status = [0]; 2635 unsafe { 2636 self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status); 2637 } 2638 if link_status[0] == 0 { 2639 let error_log = self.gl.get_program_info_log(program.id); 2640 error!( 2641 "Failed to link shader program: {}\n{}", 2642 &info.base_filename, 2643 error_log 2644 ); 2645 self.gl.delete_program(program.id); 2646 return Err(ShaderError::Link(info.base_filename.to_owned(), error_log)); 2647 } 2648 2649 if let Some(ref cached_programs) = self.cached_programs { 2650 if !cached_programs.entries.borrow().contains_key(&info.digest) { 2651 let (buffer, format) = self.gl.get_program_binary(program.id); 2652 if buffer.len() > 0 { 2653 let binary = Arc::new(ProgramBinary::new(buffer, format, info.digest.clone())); 2654 cached_programs.add_new_program_binary(binary); 2655 } 2656 } 2657 } 2658 } 2659 2660 // If we get here, the link succeeded, so get the uniforms. 2661 program.is_initialized = true; 2662 program.u_transform = self.gl.get_uniform_location(program.id, "uTransform"); 2663 program.u_texture_size = self.gl.get_uniform_location(program.id, "uTextureSize"); 2664 2665 Ok(()) 2666 } 2667 2668 pub fn bind_program(&mut self, program: &Program) -> bool { 2669 debug_assert!(self.inside_frame); 2670 debug_assert!(program.is_initialized()); 2671 if !program.is_initialized() { 2672 return false; 2673 } 2674 #[cfg(debug_assertions)] 2675 { 2676 self.shader_is_ready = true; 2677 } 2678 2679 if self.bound_program != program.id { 2680 self.gl.use_program(program.id); 2681 self.bound_program = program.id; 2682 self.bound_program_name = program.source_info.full_name_cstr.clone(); 2683 } 2684 true 2685 } 2686 2687 pub fn create_texture( 2688 &mut self, 2689 target: ImageBufferKind, 2690 format: ImageFormat, 2691 mut width: i32, 2692 mut height: i32, 2693 filter: TextureFilter, 2694 render_target: Option<RenderTargetInfo>, 2695 ) -> Texture { 2696 debug_assert!(self.inside_frame); 2697 2698 if width > self.max_texture_size || height > self.max_texture_size { 2699 error!("Attempting to allocate a texture of size {}x{} above the limit, trimming", width, height); 2700 width = width.min(self.max_texture_size); 2701 height = height.min(self.max_texture_size); 2702 } 2703 2704 // Set up the texture book-keeping. 2705 let mut texture = Texture { 2706 id: self.gl.gen_textures(1)[0], 2707 target: get_gl_target(target), 2708 size: DeviceIntSize::new(width, height), 2709 format, 2710 filter, 2711 active_swizzle: Cell::default(), 2712 fbo: None, 2713 fbo_with_depth: None, 2714 last_frame_used: self.frame_id, 2715 flags: TextureFlags::default(), 2716 }; 2717 self.bind_texture(DEFAULT_TEXTURE, &texture, Swizzle::default()); 2718 self.set_texture_parameters(texture.target, filter); 2719 2720 if self.capabilities.supports_texture_usage && render_target.is_some() { 2721 self.gl.tex_parameter_i(texture.target, gl::TEXTURE_USAGE_ANGLE, gl::FRAMEBUFFER_ATTACHMENT_ANGLE as gl::GLint); 2722 } 2723 2724 // Allocate storage. 2725 let desc = self.gl_describe_format(texture.format); 2726 2727 // Firefox doesn't use mipmaps, but Servo uses them for standalone image 2728 // textures images larger than 512 pixels. This is the only case where 2729 // we set the filter to trilinear. 2730 let mipmap_levels = if texture.filter == TextureFilter::Trilinear { 2731 let max_dimension = cmp::max(width, height); 2732 ((max_dimension) as f64).log2() as gl::GLint + 1 2733 } else { 2734 1 2735 }; 2736 2737 // We never want to upload texture data at the same time as allocating the texture. 2738 self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); 2739 2740 // Use glTexStorage where available, since it avoids allocating 2741 // unnecessary mipmap storage and generally improves performance with 2742 // stronger invariants. 2743 let use_texture_storage = match self.texture_storage_usage { 2744 TexStorageUsage::Always => true, 2745 TexStorageUsage::NonBGRA8 => texture.format != ImageFormat::BGRA8, 2746 TexStorageUsage::Never => false, 2747 }; 2748 if use_texture_storage { 2749 self.gl.tex_storage_2d( 2750 texture.target, 2751 mipmap_levels, 2752 desc.internal, 2753 texture.size.width as gl::GLint, 2754 texture.size.height as gl::GLint, 2755 ); 2756 } else { 2757 self.gl.tex_image_2d( 2758 texture.target, 2759 0, 2760 desc.internal as gl::GLint, 2761 texture.size.width as gl::GLint, 2762 texture.size.height as gl::GLint, 2763 0, 2764 desc.external, 2765 desc.pixel_type, 2766 None, 2767 ); 2768 } 2769 2770 // Set up FBOs, if required. 2771 if let Some(rt_info) = render_target { 2772 self.init_fbos(&mut texture, false); 2773 if rt_info.has_depth { 2774 self.init_fbos(&mut texture, true); 2775 } 2776 } 2777 2778 self.textures_created += 1; 2779 2780 texture 2781 } 2782 2783 fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) { 2784 let mag_filter = match filter { 2785 TextureFilter::Nearest => gl::NEAREST, 2786 TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR, 2787 }; 2788 2789 let min_filter = match filter { 2790 TextureFilter::Nearest => gl::NEAREST, 2791 TextureFilter::Linear => gl::LINEAR, 2792 TextureFilter::Trilinear => gl::LINEAR_MIPMAP_LINEAR, 2793 }; 2794 2795 self.gl 2796 .tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, mag_filter as gl::GLint); 2797 self.gl 2798 .tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, min_filter as gl::GLint); 2799 2800 self.gl 2801 .tex_parameter_i(target, gl::TEXTURE_WRAP_S, gl::CLAMP_TO_EDGE as gl::GLint); 2802 self.gl 2803 .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint); 2804 } 2805 2806 /// Copies the entire contents of one texture to another. The dest texture must be at least 2807 /// as large as the source texture in each dimension. No scaling is performed, so if the dest 2808 /// texture is larger than the source texture then some of its pixels will not be written to. 2809 pub fn copy_entire_texture( 2810 &mut self, 2811 dst: &mut Texture, 2812 src: &Texture, 2813 ) { 2814 debug_assert!(self.inside_frame); 2815 debug_assert!(dst.size.width >= src.size.width); 2816 debug_assert!(dst.size.height >= src.size.height); 2817 2818 self.copy_texture_sub_region( 2819 src, 2820 0, 2821 0, 2822 dst, 2823 0, 2824 0, 2825 src.size.width as _, 2826 src.size.height as _, 2827 ); 2828 } 2829 2830 /// Copies the specified subregion from src_texture to dest_texture. 2831 pub fn copy_texture_sub_region( 2832 &mut self, 2833 src_texture: &Texture, 2834 src_x: usize, 2835 src_y: usize, 2836 dest_texture: &Texture, 2837 dest_x: usize, 2838 dest_y: usize, 2839 width: usize, 2840 height: usize, 2841 ) { 2842 if self.capabilities.supports_copy_image_sub_data { 2843 assert_ne!( 2844 src_texture.id, dest_texture.id, 2845 "glCopyImageSubData's behaviour is undefined if src and dst images are identical and the rectangles overlap." 2846 ); 2847 unsafe { 2848 self.gl.copy_image_sub_data( 2849 src_texture.id, 2850 src_texture.target, 2851 0, 2852 src_x as _, 2853 src_y as _, 2854 0, 2855 dest_texture.id, 2856 dest_texture.target, 2857 0, 2858 dest_x as _, 2859 dest_y as _, 2860 0, 2861 width as _, 2862 height as _, 2863 1, 2864 ); 2865 } 2866 } else { 2867 let src_offset = FramebufferIntPoint::new(src_x as i32, src_y as i32); 2868 let dest_offset = FramebufferIntPoint::new(dest_x as i32, dest_y as i32); 2869 let size = FramebufferIntSize::new(width as i32, height as i32); 2870 2871 self.blit_render_target( 2872 ReadTarget::from_texture(src_texture), 2873 FramebufferIntRect::from_origin_and_size(src_offset, size), 2874 DrawTarget::from_texture(dest_texture, false), 2875 FramebufferIntRect::from_origin_and_size(dest_offset, size), 2876 // In most cases the filter shouldn't matter, as there is no scaling involved 2877 // in the blit. We were previously using Linear, but this caused issues when 2878 // blitting RGBAF32 textures on Mali, so use Nearest to be safe. 2879 TextureFilter::Nearest, 2880 ); 2881 } 2882 } 2883 2884 /// Notifies the device that the contents of a render target are no longer 2885 /// needed. 2886 pub fn invalidate_render_target(&mut self, texture: &Texture) { 2887 if self.capabilities.supports_render_target_invalidate { 2888 let (fbo, attachments) = if texture.supports_depth() { 2889 (&texture.fbo_with_depth, 2890 &[gl::COLOR_ATTACHMENT0, gl::DEPTH_ATTACHMENT] as &[gl::GLenum]) 2891 } else { 2892 (&texture.fbo, &[gl::COLOR_ATTACHMENT0] as &[gl::GLenum]) 2893 }; 2894 2895 if let Some(fbo_id) = fbo { 2896 let original_bound_fbo = self.bound_draw_fbo; 2897 // Note: The invalidate extension may not be supported, in which 2898 // case this is a no-op. That's ok though, because it's just a 2899 // hint. 2900 self.bind_external_draw_target(*fbo_id); 2901 self.gl.invalidate_framebuffer(gl::FRAMEBUFFER, attachments); 2902 self.bind_external_draw_target(original_bound_fbo); 2903 } 2904 } 2905 } 2906 2907 /// Notifies the device that the contents of the current framebuffer's depth 2908 /// attachment is no longer needed. Unlike invalidate_render_target, this can 2909 /// be called even when the contents of the colour attachment is still required. 2910 /// This should be called before unbinding the framebuffer at the end of a pass, 2911 /// to allow tiled GPUs to avoid writing the contents back to memory. 2912 pub fn invalidate_depth_target(&mut self) { 2913 assert!(self.depth_available); 2914 let attachments = if self.bound_draw_fbo == self.default_draw_fbo { 2915 &[gl::DEPTH] as &[gl::GLenum] 2916 } else { 2917 &[gl::DEPTH_ATTACHMENT] as &[gl::GLenum] 2918 }; 2919 self.gl.invalidate_framebuffer(gl::DRAW_FRAMEBUFFER, attachments); 2920 } 2921 2922 /// Notifies the device that a render target is about to be reused. 2923 /// 2924 /// This method adds or removes a depth target as necessary. 2925 pub fn reuse_render_target<T: Texel>( 2926 &mut self, 2927 texture: &mut Texture, 2928 rt_info: RenderTargetInfo, 2929 ) { 2930 texture.last_frame_used = self.frame_id; 2931 2932 // Add depth support if needed. 2933 if rt_info.has_depth && !texture.supports_depth() { 2934 self.init_fbos(texture, true); 2935 } 2936 } 2937 2938 fn init_fbos(&mut self, texture: &mut Texture, with_depth: bool) { 2939 let (fbo, depth_rb) = if with_depth { 2940 let depth_target = self.acquire_depth_target(texture.get_dimensions()); 2941 (&mut texture.fbo_with_depth, Some(depth_target)) 2942 } else { 2943 (&mut texture.fbo, None) 2944 }; 2945 2946 // Generate the FBOs. 2947 assert!(fbo.is_none()); 2948 let fbo_id = FBOId(*self.gl.gen_framebuffers(1).first().unwrap()); 2949 *fbo = Some(fbo_id); 2950 2951 // Bind the FBOs. 2952 let original_bound_fbo = self.bound_draw_fbo; 2953 2954 self.bind_external_draw_target(fbo_id); 2955 2956 self.gl.framebuffer_texture_2d( 2957 gl::DRAW_FRAMEBUFFER, 2958 gl::COLOR_ATTACHMENT0, 2959 texture.target, 2960 texture.id, 2961 0, 2962 ); 2963 2964 if let Some(depth_rb) = depth_rb { 2965 self.gl.framebuffer_renderbuffer( 2966 gl::DRAW_FRAMEBUFFER, 2967 gl::DEPTH_ATTACHMENT, 2968 gl::RENDERBUFFER, 2969 depth_rb.0, 2970 ); 2971 } 2972 2973 debug_assert_eq!( 2974 self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER), 2975 gl::FRAMEBUFFER_COMPLETE, 2976 "Incomplete framebuffer", 2977 ); 2978 2979 self.bind_external_draw_target(original_bound_fbo); 2980 } 2981 2982 fn acquire_depth_target(&mut self, dimensions: DeviceIntSize) -> RBOId { 2983 let gl = &self.gl; 2984 let depth_format = self.depth_format; 2985 let target = self.depth_targets.entry(dimensions).or_insert_with(|| { 2986 let renderbuffer_ids = gl.gen_renderbuffers(1); 2987 let depth_rb = renderbuffer_ids[0]; 2988 gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb); 2989 gl.renderbuffer_storage( 2990 gl::RENDERBUFFER, 2991 depth_format, 2992 dimensions.width as _, 2993 dimensions.height as _, 2994 ); 2995 SharedDepthTarget { 2996 rbo_id: RBOId(depth_rb), 2997 refcount: 0, 2998 } 2999 }); 3000 target.refcount += 1; 3001 target.rbo_id 3002 } 3003 3004 fn release_depth_target(&mut self, dimensions: DeviceIntSize) { 3005 let mut entry = match self.depth_targets.entry(dimensions) { 3006 Entry::Occupied(x) => x, 3007 Entry::Vacant(..) => panic!("Releasing unknown depth target"), 3008 }; 3009 debug_assert!(entry.get().refcount != 0); 3010 entry.get_mut().refcount -= 1; 3011 if entry.get().refcount == 0 { 3012 let (_, target) = entry.remove_entry(); 3013 self.gl.delete_renderbuffers(&[target.rbo_id.0]); 3014 } 3015 } 3016 3017 /// Perform a blit between self.bound_read_fbo and self.bound_draw_fbo. 3018 fn blit_render_target_impl( 3019 &mut self, 3020 src_rect: FramebufferIntRect, 3021 dest_rect: FramebufferIntRect, 3022 filter: TextureFilter, 3023 ) { 3024 debug_assert!(self.inside_frame); 3025 3026 let filter = match filter { 3027 TextureFilter::Nearest => gl::NEAREST, 3028 TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR, 3029 }; 3030 3031 let src_x0 = src_rect.min.x + self.bound_read_fbo.1.x; 3032 let src_y0 = src_rect.min.y + self.bound_read_fbo.1.y; 3033 3034 self.gl.blit_framebuffer( 3035 src_x0, 3036 src_y0, 3037 src_x0 + src_rect.width(), 3038 src_y0 + src_rect.height(), 3039 dest_rect.min.x, 3040 dest_rect.min.y, 3041 dest_rect.max.x, 3042 dest_rect.max.y, 3043 gl::COLOR_BUFFER_BIT, 3044 filter, 3045 ); 3046 } 3047 3048 /// Perform a blit between src_target and dest_target. 3049 /// This will overwrite self.bound_read_fbo and self.bound_draw_fbo. 3050 pub fn blit_render_target( 3051 &mut self, 3052 src_target: ReadTarget, 3053 src_rect: FramebufferIntRect, 3054 dest_target: DrawTarget, 3055 dest_rect: FramebufferIntRect, 3056 filter: TextureFilter, 3057 ) { 3058 debug_assert!(self.inside_frame); 3059 3060 self.bind_read_target(src_target); 3061 3062 self.bind_draw_target(dest_target); 3063 3064 self.blit_render_target_impl(src_rect, dest_rect, filter); 3065 } 3066 3067 /// Performs a blit while flipping vertically. Useful for blitting textures 3068 /// (which use origin-bottom-left) to the main framebuffer (which uses 3069 /// origin-top-left). 3070 pub fn blit_render_target_invert_y( 3071 &mut self, 3072 src_target: ReadTarget, 3073 src_rect: FramebufferIntRect, 3074 dest_target: DrawTarget, 3075 dest_rect: FramebufferIntRect, 3076 ) { 3077 debug_assert!(self.inside_frame); 3078 3079 let mut inverted_dest_rect = dest_rect; 3080 inverted_dest_rect.min.y = dest_rect.max.y; 3081 inverted_dest_rect.max.y = dest_rect.min.y; 3082 3083 self.blit_render_target( 3084 src_target, 3085 src_rect, 3086 dest_target, 3087 inverted_dest_rect, 3088 TextureFilter::Linear, 3089 ); 3090 } 3091 3092 pub fn delete_texture(&mut self, mut texture: Texture) { 3093 debug_assert!(self.inside_frame); 3094 let had_depth = texture.supports_depth(); 3095 if let Some(fbo) = texture.fbo { 3096 self.gl.delete_framebuffers(&[fbo.0]); 3097 texture.fbo = None; 3098 } 3099 if let Some(fbo) = texture.fbo_with_depth { 3100 self.gl.delete_framebuffers(&[fbo.0]); 3101 texture.fbo_with_depth = None; 3102 } 3103 3104 if had_depth { 3105 self.release_depth_target(texture.get_dimensions()); 3106 } 3107 3108 self.gl.delete_textures(&[texture.id]); 3109 3110 for bound_texture in &mut self.bound_textures { 3111 if *bound_texture == texture.id { 3112 *bound_texture = 0; 3113 } 3114 } 3115 3116 self.textures_deleted += 1; 3117 3118 // Disarm the assert in Texture::drop(). 3119 texture.id = 0; 3120 } 3121 3122 #[cfg(feature = "replay")] 3123 pub fn delete_external_texture(&mut self, mut external: ExternalTexture) { 3124 self.gl.delete_textures(&[external.id]); 3125 external.id = 0; 3126 } 3127 3128 pub fn delete_program(&mut self, mut program: Program) { 3129 self.gl.delete_program(program.id); 3130 program.id = 0; 3131 } 3132 3133 /// Create a shader program and link it immediately. 3134 pub fn create_program_linked( 3135 &mut self, 3136 base_filename: &'static str, 3137 features: &[&'static str], 3138 descriptor: &VertexDescriptor, 3139 ) -> Result<Program, ShaderError> { 3140 let mut program = self.create_program(base_filename, features)?; 3141 self.link_program(&mut program, descriptor)?; 3142 Ok(program) 3143 } 3144 3145 /// Create a shader program. This does minimal amount of work to start 3146 /// loading a binary shader. If a binary shader is found, we invoke 3147 /// glProgramBinary, which, at least on ANGLE, will load and link the 3148 /// binary on a background thread. This can speed things up later when 3149 /// we invoke `link_program()`. 3150 pub fn create_program( 3151 &mut self, 3152 base_filename: &'static str, 3153 features: &[&'static str], 3154 ) -> Result<Program, ShaderError> { 3155 debug_assert!(self.inside_frame); 3156 3157 let source_info = ProgramSourceInfo::new(self, base_filename, features); 3158 3159 // Create program 3160 let pid = self.gl.create_program(); 3161 3162 // Attempt to load a cached binary if possible. 3163 if let Some(ref cached_programs) = self.cached_programs { 3164 if let Some(entry) = cached_programs.entries.borrow().get(&source_info.digest) { 3165 self.gl.program_binary(pid, entry.binary.format, &entry.binary.bytes); 3166 } 3167 } 3168 3169 // Use 0 for the uniforms as they are initialized by link_program. 3170 let program = Program { 3171 id: pid, 3172 u_transform: 0, 3173 u_texture_size: 0, 3174 source_info, 3175 is_initialized: false, 3176 }; 3177 3178 Ok(program) 3179 } 3180 3181 fn build_shader_string<F: FnMut(&str)>( 3182 &self, 3183 features: &[&'static str], 3184 kind: ShaderKind, 3185 base_filename: &str, 3186 output: F, 3187 ) { 3188 do_build_shader_string( 3189 get_shader_version(&*self.gl), 3190 features, 3191 kind, 3192 base_filename, 3193 &|f| get_unoptimized_shader_source(f, self.resource_override_path.as_ref()), 3194 output, 3195 ) 3196 } 3197 3198 pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)]) 3199 where 3200 S: Into<TextureSlot> + Copy, 3201 { 3202 // bind_program() must be called before calling bind_shader_samplers 3203 assert_eq!(self.bound_program, program.id); 3204 3205 for binding in bindings { 3206 let u_location = self.gl.get_uniform_location(program.id, binding.0); 3207 if u_location != -1 { 3208 self.bind_program(program); 3209 self.gl 3210 .uniform_1i(u_location, binding.1.into().0 as gl::GLint); 3211 } 3212 } 3213 } 3214 3215 pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation { 3216 UniformLocation(self.gl.get_uniform_location(program.id, name)) 3217 } 3218 3219 pub fn set_uniforms( 3220 &self, 3221 program: &Program, 3222 transform: &Transform3D<f32>, 3223 ) { 3224 debug_assert!(self.inside_frame); 3225 #[cfg(debug_assertions)] 3226 debug_assert!(self.shader_is_ready); 3227 3228 self.gl 3229 .uniform_matrix_4fv(program.u_transform, false, &transform.to_array()); 3230 } 3231 3232 /// Sets the uTextureSize uniform. Most shaders do not require this to be called 3233 /// as they use the textureSize GLSL function instead. 3234 pub fn set_shader_texture_size( 3235 &self, 3236 program: &Program, 3237 texture_size: DeviceSize, 3238 ) { 3239 debug_assert!(self.inside_frame); 3240 #[cfg(debug_assertions)] 3241 debug_assert!(self.shader_is_ready); 3242 3243 if program.u_texture_size != -1 { 3244 self.gl.uniform_2f(program.u_texture_size, texture_size.width, texture_size.height); 3245 } 3246 } 3247 3248 pub fn create_pbo(&mut self) -> PBO { 3249 let id = self.gl.gen_buffers(1)[0]; 3250 PBO { 3251 id, 3252 reserved_size: 0, 3253 } 3254 } 3255 3256 pub fn create_pbo_with_size(&mut self, size: usize) -> PBO { 3257 let mut pbo = self.create_pbo(); 3258 3259 self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id); 3260 self.gl.pixel_store_i(gl::PACK_ALIGNMENT, 1); 3261 self.gl.buffer_data_untyped( 3262 gl::PIXEL_PACK_BUFFER, 3263 size as _, 3264 ptr::null(), 3265 gl::STREAM_READ, 3266 ); 3267 self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); 3268 3269 pbo.reserved_size = size; 3270 pbo 3271 } 3272 3273 pub fn read_pixels_into_pbo( 3274 &mut self, 3275 read_target: ReadTarget, 3276 rect: DeviceIntRect, 3277 format: ImageFormat, 3278 pbo: &PBO, 3279 ) { 3280 let byte_size = rect.area() as usize * format.bytes_per_pixel() as usize; 3281 3282 assert!(byte_size <= pbo.reserved_size); 3283 3284 self.bind_read_target(read_target); 3285 3286 self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id); 3287 self.gl.pixel_store_i(gl::PACK_ALIGNMENT, 1); 3288 3289 let gl_format = self.gl_describe_format(format); 3290 3291 unsafe { 3292 self.gl.read_pixels_into_pbo( 3293 rect.min.x as _, 3294 rect.min.y as _, 3295 rect.width() as _, 3296 rect.height() as _, 3297 gl_format.read, 3298 gl_format.pixel_type, 3299 ); 3300 } 3301 3302 self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, 0); 3303 } 3304 3305 pub fn map_pbo_for_readback<'a>(&'a mut self, pbo: &'a PBO) -> Option<BoundPBO<'a>> { 3306 self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id); 3307 3308 let buf_ptr = match self.gl.get_type() { 3309 gl::GlType::Gl => { 3310 self.gl.map_buffer(gl::PIXEL_PACK_BUFFER, gl::READ_ONLY) 3311 } 3312 3313 gl::GlType::Gles => { 3314 self.gl.map_buffer_range( 3315 gl::PIXEL_PACK_BUFFER, 3316 0, 3317 pbo.reserved_size as _, 3318 gl::MAP_READ_BIT) 3319 } 3320 }; 3321 3322 if buf_ptr.is_null() { 3323 return None; 3324 } 3325 3326 let buffer = unsafe { slice::from_raw_parts(buf_ptr as *const u8, pbo.reserved_size) }; 3327 3328 Some(BoundPBO { 3329 device: self, 3330 data: buffer, 3331 }) 3332 } 3333 3334 pub fn delete_pbo(&mut self, mut pbo: PBO) { 3335 self.gl.delete_buffers(&[pbo.id]); 3336 pbo.id = 0; 3337 pbo.reserved_size = 0 3338 } 3339 3340 /// Returns the size and stride in bytes required to upload an area of pixels 3341 /// of the specified size, to a texture of the specified format. 3342 pub fn required_upload_size_and_stride(&self, size: DeviceIntSize, format: ImageFormat) -> (usize, usize) { 3343 assert!(size.width >= 0); 3344 assert!(size.height >= 0); 3345 3346 let bytes_pp = format.bytes_per_pixel() as usize; 3347 let width_bytes = size.width as usize * bytes_pp; 3348 3349 let dst_stride = round_up_to_multiple(width_bytes, self.required_pbo_stride.num_bytes(format)); 3350 3351 // The size of the chunk should only need to be (height - 1) * dst_stride + width_bytes, 3352 // however, the android emulator will error unless it is height * dst_stride. 3353 // See bug 1587047 for details. 3354 // Using the full final row also ensures that the offset of the next chunk is 3355 // optimally aligned. 3356 let dst_size = dst_stride * size.height as usize; 3357 3358 (dst_size, dst_stride) 3359 } 3360 3361 /// Returns a `TextureUploader` which can be used to upload texture data to `texture`. 3362 /// Once uploads have been performed the uploader must be flushed with `TextureUploader::flush()`. 3363 pub fn upload_texture<'a>( 3364 &mut self, 3365 pbo_pool: &'a mut UploadPBOPool, 3366 ) -> TextureUploader<'a> { 3367 debug_assert!(self.inside_frame); 3368 3369 pbo_pool.begin_frame(self); 3370 3371 TextureUploader { 3372 buffers: Vec::new(), 3373 pbo_pool, 3374 } 3375 } 3376 3377 /// Performs an immediate (non-PBO) texture upload. 3378 pub fn upload_texture_immediate<T: Texel>( 3379 &mut self, 3380 texture: &Texture, 3381 pixels: &[T] 3382 ) { 3383 self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default()); 3384 let desc = self.gl_describe_format(texture.format); 3385 self.gl.tex_sub_image_2d( 3386 texture.target, 3387 0, 3388 0, 3389 0, 3390 texture.size.width as gl::GLint, 3391 texture.size.height as gl::GLint, 3392 desc.external, 3393 desc.pixel_type, 3394 texels_to_u8_slice(pixels), 3395 ); 3396 } 3397 3398 pub fn read_pixels(&mut self, img_desc: &ImageDescriptor) -> Vec<u8> { 3399 let desc = self.gl_describe_format(img_desc.format); 3400 self.gl.read_pixels( 3401 0, 0, 3402 img_desc.size.width as i32, 3403 img_desc.size.height as i32, 3404 desc.read, 3405 desc.pixel_type, 3406 ) 3407 } 3408 3409 /// Read rectangle of pixels into the specified output slice. 3410 pub fn read_pixels_into( 3411 &mut self, 3412 rect: FramebufferIntRect, 3413 format: ImageFormat, 3414 output: &mut [u8], 3415 ) { 3416 let bytes_per_pixel = format.bytes_per_pixel(); 3417 let desc = self.gl_describe_format(format); 3418 let size_in_bytes = (bytes_per_pixel * rect.area()) as usize; 3419 assert_eq!(output.len(), size_in_bytes); 3420 3421 self.gl.flush(); 3422 self.gl.read_pixels_into_buffer( 3423 rect.min.x as _, 3424 rect.min.y as _, 3425 rect.width() as _, 3426 rect.height() as _, 3427 desc.read, 3428 desc.pixel_type, 3429 output, 3430 ); 3431 } 3432 3433 /// Get texels of a texture into the specified output slice. 3434 pub fn get_tex_image_into( 3435 &mut self, 3436 texture: &Texture, 3437 format: ImageFormat, 3438 output: &mut [u8], 3439 ) { 3440 self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default()); 3441 let desc = self.gl_describe_format(format); 3442 self.gl.get_tex_image_into_buffer( 3443 texture.target, 3444 0, 3445 desc.external, 3446 desc.pixel_type, 3447 output, 3448 ); 3449 } 3450 3451 /// Attaches the provided texture to the current Read FBO binding. 3452 fn attach_read_texture_raw(&mut self, texture_id: gl::GLuint, target: gl::GLuint) { 3453 self.gl.framebuffer_texture_2d( 3454 gl::READ_FRAMEBUFFER, 3455 gl::COLOR_ATTACHMENT0, 3456 target, 3457 texture_id, 3458 0, 3459 ) 3460 } 3461 3462 pub fn attach_read_texture_external( 3463 &mut self, texture_id: gl::GLuint, target: ImageBufferKind 3464 ) { 3465 self.attach_read_texture_raw(texture_id, get_gl_target(target)) 3466 } 3467 3468 pub fn attach_read_texture(&mut self, texture: &Texture) { 3469 self.attach_read_texture_raw(texture.id, texture.target) 3470 } 3471 3472 fn bind_vao_impl(&mut self, id: gl::GLuint) { 3473 debug_assert!(self.inside_frame); 3474 3475 if self.bound_vao != id { 3476 self.bound_vao = id; 3477 self.gl.bind_vertex_array(id); 3478 } 3479 } 3480 3481 pub fn bind_vao(&mut self, vao: &VAO) { 3482 self.bind_vao_impl(vao.id) 3483 } 3484 3485 pub fn bind_custom_vao(&mut self, vao: &CustomVAO) { 3486 self.bind_vao_impl(vao.id) 3487 } 3488 3489 fn create_vao_with_vbos( 3490 &mut self, 3491 descriptor: &VertexDescriptor, 3492 main_vbo_id: VBOId, 3493 instance_vbo_id: VBOId, 3494 instance_divisor: u32, 3495 ibo_id: IBOId, 3496 owns_vertices_and_indices: bool, 3497 ) -> VAO { 3498 let instance_stride = descriptor.instance_stride() as usize; 3499 let vao_id = self.gl.gen_vertex_arrays(1)[0]; 3500 3501 self.bind_vao_impl(vao_id); 3502 3503 descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id, instance_divisor); 3504 ibo_id.bind(self.gl()); // force it to be a part of VAO 3505 3506 VAO { 3507 id: vao_id, 3508 ibo_id, 3509 main_vbo_id, 3510 instance_vbo_id, 3511 instance_stride, 3512 instance_divisor, 3513 owns_vertices_and_indices, 3514 } 3515 } 3516 3517 pub fn create_custom_vao( 3518 &mut self, 3519 streams: &[Stream], 3520 ) -> CustomVAO { 3521 debug_assert!(self.inside_frame); 3522 3523 let vao_id = self.gl.gen_vertex_arrays(1)[0]; 3524 self.bind_vao_impl(vao_id); 3525 3526 let mut attrib_index = 0; 3527 for stream in streams { 3528 VertexDescriptor::bind_attributes( 3529 stream.attributes, 3530 attrib_index, 3531 0, 3532 self.gl(), 3533 stream.vbo, 3534 ); 3535 attrib_index += stream.attributes.len(); 3536 } 3537 3538 CustomVAO { 3539 id: vao_id, 3540 } 3541 } 3542 3543 pub fn delete_custom_vao(&mut self, mut vao: CustomVAO) { 3544 self.gl.delete_vertex_arrays(&[vao.id]); 3545 vao.id = 0; 3546 } 3547 3548 pub fn create_vbo<T>(&mut self) -> VBO<T> { 3549 let ids = self.gl.gen_buffers(1); 3550 VBO { 3551 id: ids[0], 3552 target: gl::ARRAY_BUFFER, 3553 allocated_count: 0, 3554 marker: PhantomData, 3555 } 3556 } 3557 3558 pub fn delete_vbo<T>(&mut self, mut vbo: VBO<T>) { 3559 self.gl.delete_buffers(&[vbo.id]); 3560 vbo.id = 0; 3561 } 3562 3563 pub fn create_vao(&mut self, descriptor: &VertexDescriptor, instance_divisor: u32) -> VAO { 3564 debug_assert!(self.inside_frame); 3565 3566 let buffer_ids = self.gl.gen_buffers(3); 3567 let ibo_id = IBOId(buffer_ids[0]); 3568 let main_vbo_id = VBOId(buffer_ids[1]); 3569 let intance_vbo_id = VBOId(buffer_ids[2]); 3570 3571 self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, instance_divisor, ibo_id, true) 3572 } 3573 3574 pub fn delete_vao(&mut self, mut vao: VAO) { 3575 self.gl.delete_vertex_arrays(&[vao.id]); 3576 vao.id = 0; 3577 3578 if vao.owns_vertices_and_indices { 3579 self.gl.delete_buffers(&[vao.ibo_id.0]); 3580 self.gl.delete_buffers(&[vao.main_vbo_id.0]); 3581 } 3582 3583 self.gl.delete_buffers(&[vao.instance_vbo_id.0]) 3584 } 3585 3586 pub fn allocate_vbo<V>( 3587 &mut self, 3588 vbo: &mut VBO<V>, 3589 count: usize, 3590 usage_hint: VertexUsageHint, 3591 ) { 3592 debug_assert!(self.inside_frame); 3593 vbo.allocated_count = count; 3594 3595 self.gl.bind_buffer(vbo.target, vbo.id); 3596 self.gl.buffer_data_untyped( 3597 vbo.target, 3598 (count * mem::size_of::<V>()) as _, 3599 ptr::null(), 3600 usage_hint.to_gl(), 3601 ); 3602 } 3603 3604 pub fn fill_vbo<V>( 3605 &mut self, 3606 vbo: &VBO<V>, 3607 data: &[V], 3608 offset: usize, 3609 ) { 3610 debug_assert!(self.inside_frame); 3611 assert!(offset + data.len() <= vbo.allocated_count); 3612 let stride = mem::size_of::<V>(); 3613 3614 self.gl.bind_buffer(vbo.target, vbo.id); 3615 self.gl.buffer_sub_data_untyped( 3616 vbo.target, 3617 (offset * stride) as _, 3618 (data.len() * stride) as _, 3619 data.as_ptr() as _, 3620 ); 3621 } 3622 3623 fn update_vbo_data<V>( 3624 &mut self, 3625 vbo: VBOId, 3626 vertices: &[V], 3627 usage_hint: VertexUsageHint, 3628 ) { 3629 debug_assert!(self.inside_frame); 3630 3631 vbo.bind(self.gl()); 3632 gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl()); 3633 } 3634 3635 pub fn create_vao_with_new_instances( 3636 &mut self, 3637 descriptor: &VertexDescriptor, 3638 base_vao: &VAO, 3639 ) -> VAO { 3640 debug_assert!(self.inside_frame); 3641 3642 let buffer_ids = self.gl.gen_buffers(1); 3643 let intance_vbo_id = VBOId(buffer_ids[0]); 3644 3645 self.create_vao_with_vbos( 3646 descriptor, 3647 base_vao.main_vbo_id, 3648 intance_vbo_id, 3649 base_vao.instance_divisor, 3650 base_vao.ibo_id, 3651 false, 3652 ) 3653 } 3654 3655 pub fn update_vao_main_vertices<V>( 3656 &mut self, 3657 vao: &VAO, 3658 vertices: &[V], 3659 usage_hint: VertexUsageHint, 3660 ) { 3661 debug_assert_eq!(self.bound_vao, vao.id); 3662 self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint) 3663 } 3664 3665 pub fn update_vao_instances<V: Clone>( 3666 &mut self, 3667 vao: &VAO, 3668 instances: &[V], 3669 usage_hint: VertexUsageHint, 3670 // if `Some(count)`, each instance is repeated `count` times 3671 repeat: Option<NonZeroUsize>, 3672 ) { 3673 debug_assert_eq!(self.bound_vao, vao.id); 3674 debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>()); 3675 3676 match repeat { 3677 Some(count) => { 3678 let target = gl::ARRAY_BUFFER; 3679 self.gl.bind_buffer(target, vao.instance_vbo_id.0); 3680 let size = instances.len() * count.get() * mem::size_of::<V>(); 3681 self.gl.buffer_data_untyped( 3682 target, 3683 size as _, 3684 ptr::null(), 3685 usage_hint.to_gl(), 3686 ); 3687 3688 let ptr = match self.gl.get_type() { 3689 gl::GlType::Gl => { 3690 self.gl.map_buffer(target, gl::WRITE_ONLY) 3691 } 3692 gl::GlType::Gles => { 3693 self.gl.map_buffer_range(target, 0, size as _, gl::MAP_WRITE_BIT) 3694 } 3695 }; 3696 assert!(!ptr.is_null()); 3697 3698 let buffer_slice = unsafe { 3699 slice::from_raw_parts_mut(ptr as *mut V, instances.len() * count.get()) 3700 }; 3701 for (quad, instance) in buffer_slice.chunks_mut(4).zip(instances) { 3702 quad[0] = instance.clone(); 3703 quad[1] = instance.clone(); 3704 quad[2] = instance.clone(); 3705 quad[3] = instance.clone(); 3706 } 3707 self.gl.unmap_buffer(target); 3708 } 3709 None => { 3710 self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint); 3711 } 3712 } 3713 3714 // On some devices the VAO must be manually unbound and rebound after an attached buffer has 3715 // been orphaned. Failure to do so appeared to result in the orphaned buffer's contents 3716 // being used for the subsequent draw call, rather than the new buffer's contents. 3717 if self.capabilities.requires_vao_rebind_after_orphaning { 3718 self.bind_vao_impl(0); 3719 self.bind_vao_impl(vao.id); 3720 } 3721 } 3722 3723 pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) { 3724 debug_assert!(self.inside_frame); 3725 debug_assert_eq!(self.bound_vao, vao.id); 3726 3727 vao.ibo_id.bind(self.gl()); 3728 gl::buffer_data( 3729 self.gl(), 3730 gl::ELEMENT_ARRAY_BUFFER, 3731 indices, 3732 usage_hint.to_gl(), 3733 ); 3734 } 3735 3736 pub fn draw_triangles_u16(&mut self, first_vertex: i32, index_count: i32) { 3737 debug_assert!(self.inside_frame); 3738 #[cfg(debug_assertions)] 3739 debug_assert!(self.shader_is_ready); 3740 3741 let _guard = if self.annotate_draw_call_crashes { 3742 Some(CrashAnnotatorGuard::new( 3743 &self.crash_annotator, 3744 CrashAnnotation::DrawShader, 3745 &self.bound_program_name, 3746 )) 3747 } else { 3748 None 3749 }; 3750 3751 self.gl.draw_elements( 3752 gl::TRIANGLES, 3753 index_count, 3754 gl::UNSIGNED_SHORT, 3755 first_vertex as u32 * 2, 3756 ); 3757 } 3758 3759 pub fn draw_triangles_u32(&mut self, first_vertex: i32, index_count: i32) { 3760 debug_assert!(self.inside_frame); 3761 #[cfg(debug_assertions)] 3762 debug_assert!(self.shader_is_ready); 3763 3764 let _guard = if self.annotate_draw_call_crashes { 3765 Some(CrashAnnotatorGuard::new( 3766 &self.crash_annotator, 3767 CrashAnnotation::DrawShader, 3768 &self.bound_program_name, 3769 )) 3770 } else { 3771 None 3772 }; 3773 3774 self.gl.draw_elements( 3775 gl::TRIANGLES, 3776 index_count, 3777 gl::UNSIGNED_INT, 3778 first_vertex as u32 * 4, 3779 ); 3780 } 3781 3782 pub fn draw_nonindexed_points(&mut self, first_vertex: i32, vertex_count: i32) { 3783 debug_assert!(self.inside_frame); 3784 #[cfg(debug_assertions)] 3785 debug_assert!(self.shader_is_ready); 3786 3787 let _guard = if self.annotate_draw_call_crashes { 3788 Some(CrashAnnotatorGuard::new( 3789 &self.crash_annotator, 3790 CrashAnnotation::DrawShader, 3791 &self.bound_program_name, 3792 )) 3793 } else { 3794 None 3795 }; 3796 3797 self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count); 3798 } 3799 3800 pub fn draw_nonindexed_lines(&mut self, first_vertex: i32, vertex_count: i32) { 3801 debug_assert!(self.inside_frame); 3802 #[cfg(debug_assertions)] 3803 debug_assert!(self.shader_is_ready); 3804 3805 let _guard = if self.annotate_draw_call_crashes { 3806 Some(CrashAnnotatorGuard::new( 3807 &self.crash_annotator, 3808 CrashAnnotation::DrawShader, 3809 &self.bound_program_name, 3810 )) 3811 } else { 3812 None 3813 }; 3814 3815 self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count); 3816 } 3817 3818 pub fn draw_indexed_triangles(&mut self, index_count: i32) { 3819 debug_assert!(self.inside_frame); 3820 #[cfg(debug_assertions)] 3821 debug_assert!(self.shader_is_ready); 3822 3823 let _guard = if self.annotate_draw_call_crashes { 3824 Some(CrashAnnotatorGuard::new( 3825 &self.crash_annotator, 3826 CrashAnnotation::DrawShader, 3827 &self.bound_program_name, 3828 )) 3829 } else { 3830 None 3831 }; 3832 3833 self.gl.draw_elements( 3834 gl::TRIANGLES, 3835 index_count, 3836 gl::UNSIGNED_SHORT, 3837 0, 3838 ); 3839 } 3840 3841 pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) { 3842 debug_assert!(self.inside_frame); 3843 #[cfg(debug_assertions)] 3844 debug_assert!(self.shader_is_ready); 3845 3846 let _guard = if self.annotate_draw_call_crashes { 3847 Some(CrashAnnotatorGuard::new( 3848 &self.crash_annotator, 3849 CrashAnnotation::DrawShader, 3850 &self.bound_program_name, 3851 )) 3852 } else { 3853 None 3854 }; 3855 3856 self.gl.draw_elements_instanced( 3857 gl::TRIANGLES, 3858 index_count, 3859 gl::UNSIGNED_SHORT, 3860 0, 3861 instance_count, 3862 ); 3863 } 3864 3865 pub fn end_frame(&mut self) { 3866 self.reset_draw_target(); 3867 self.reset_read_target(); 3868 3869 debug_assert!(self.inside_frame); 3870 self.inside_frame = false; 3871 3872 self.gl.bind_texture(gl::TEXTURE_2D, 0); 3873 self.gl.use_program(0); 3874 3875 for i in 0 .. self.bound_textures.len() { 3876 self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint); 3877 self.gl.bind_texture(gl::TEXTURE_2D, 0); 3878 } 3879 3880 self.gl.active_texture(gl::TEXTURE0); 3881 3882 self.frame_id.0 += 1; 3883 3884 // Save any shaders compiled this frame to disk. 3885 // If this is the tenth frame then treat startup as complete, meaning the 3886 // current set of in-use shaders are the ones to load on the next startup. 3887 if let Some(ref cache) = self.cached_programs { 3888 cache.update_disk_cache(self.frame_id.0 == 10); 3889 } 3890 } 3891 3892 pub fn clear_target( 3893 &self, 3894 color: Option<[f32; 4]>, 3895 depth: Option<f32>, 3896 rect: Option<FramebufferIntRect>, 3897 ) { 3898 let mut clear_bits = 0; 3899 3900 if let Some(color) = color { 3901 self.gl.clear_color(color[0], color[1], color[2], color[3]); 3902 clear_bits |= gl::COLOR_BUFFER_BIT; 3903 } 3904 3905 if let Some(depth) = depth { 3906 if cfg!(debug_assertions) { 3907 let mut mask = [0]; 3908 unsafe { 3909 self.gl.get_boolean_v(gl::DEPTH_WRITEMASK, &mut mask); 3910 } 3911 assert_ne!(mask[0], 0); 3912 } 3913 self.gl.clear_depth(depth as f64); 3914 clear_bits |= gl::DEPTH_BUFFER_BIT; 3915 } 3916 3917 if clear_bits != 0 { 3918 match rect { 3919 Some(rect) => { 3920 self.gl.enable(gl::SCISSOR_TEST); 3921 self.gl.scissor( 3922 rect.min.x, 3923 rect.min.y, 3924 rect.width(), 3925 rect.height(), 3926 ); 3927 self.gl.clear(clear_bits); 3928 self.gl.disable(gl::SCISSOR_TEST); 3929 } 3930 None => { 3931 self.gl.clear(clear_bits); 3932 } 3933 } 3934 } 3935 } 3936 3937 pub fn enable_depth(&self, depth_func: DepthFunction) { 3938 assert!(self.depth_available, "Enabling depth test without depth target"); 3939 self.gl.enable(gl::DEPTH_TEST); 3940 self.gl.depth_func(depth_func as gl::GLuint); 3941 } 3942 3943 pub fn disable_depth(&self) { 3944 self.gl.disable(gl::DEPTH_TEST); 3945 } 3946 3947 pub fn enable_depth_write(&self) { 3948 assert!(self.depth_available, "Enabling depth write without depth target"); 3949 self.gl.depth_mask(true); 3950 } 3951 3952 pub fn disable_depth_write(&self) { 3953 self.gl.depth_mask(false); 3954 } 3955 3956 pub fn disable_stencil(&self) { 3957 self.gl.disable(gl::STENCIL_TEST); 3958 } 3959 3960 pub fn set_scissor_rect(&self, rect: FramebufferIntRect) { 3961 self.gl.scissor( 3962 rect.min.x, 3963 rect.min.y, 3964 rect.width(), 3965 rect.height(), 3966 ); 3967 } 3968 3969 pub fn enable_scissor(&self) { 3970 self.gl.enable(gl::SCISSOR_TEST); 3971 } 3972 3973 pub fn disable_scissor(&self) { 3974 self.gl.disable(gl::SCISSOR_TEST); 3975 } 3976 3977 pub fn enable_color_write(&self) { 3978 self.gl.color_mask(true, true, true, true); 3979 } 3980 3981 pub fn disable_color_write(&self) { 3982 self.gl.color_mask(false, false, false, false); 3983 } 3984 3985 pub fn set_blend(&mut self, enable: bool) { 3986 if enable { 3987 self.gl.enable(gl::BLEND); 3988 } else { 3989 self.gl.disable(gl::BLEND); 3990 } 3991 #[cfg(debug_assertions)] 3992 { 3993 self.shader_is_ready = false; 3994 } 3995 } 3996 3997 fn set_blend_factors( 3998 &mut self, 3999 color: (gl::GLenum, gl::GLenum), 4000 alpha: (gl::GLenum, gl::GLenum), 4001 ) { 4002 self.gl.blend_equation(gl::FUNC_ADD); 4003 if color == alpha { 4004 self.gl.blend_func(color.0, color.1); 4005 } else { 4006 self.gl.blend_func_separate(color.0, color.1, alpha.0, alpha.1); 4007 } 4008 #[cfg(debug_assertions)] 4009 { 4010 self.shader_is_ready = false; 4011 } 4012 } 4013 4014 pub fn set_blend_mode_alpha(&mut self) { 4015 self.set_blend_factors( 4016 (gl::SRC_ALPHA, gl::ONE_MINUS_SRC_ALPHA), 4017 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4018 ); 4019 } 4020 4021 pub fn set_blend_mode_premultiplied_alpha(&mut self) { 4022 self.set_blend_factors( 4023 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4024 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4025 ); 4026 } 4027 4028 pub fn set_blend_mode_premultiplied_dest_out(&mut self) { 4029 self.set_blend_factors( 4030 (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA), 4031 (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA), 4032 ); 4033 } 4034 4035 pub fn set_blend_mode_multiply(&mut self) { 4036 self.set_blend_factors( 4037 (gl::ZERO, gl::SRC_COLOR), 4038 (gl::ZERO, gl::SRC_ALPHA), 4039 ); 4040 } 4041 pub fn set_blend_mode_subpixel_pass0(&mut self) { 4042 self.set_blend_factors( 4043 (gl::ZERO, gl::ONE_MINUS_SRC_COLOR), 4044 (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA), 4045 ); 4046 } 4047 pub fn set_blend_mode_subpixel_pass1(&mut self) { 4048 self.set_blend_factors( 4049 (gl::ONE, gl::ONE), 4050 (gl::ONE, gl::ONE), 4051 ); 4052 } 4053 pub fn set_blend_mode_subpixel_dual_source(&mut self) { 4054 self.set_blend_factors( 4055 (gl::ONE, gl::ONE_MINUS_SRC1_COLOR), 4056 (gl::ONE, gl::ONE_MINUS_SRC1_ALPHA), 4057 ); 4058 } 4059 pub fn set_blend_mode_multiply_dual_source(&mut self) { 4060 self.set_blend_factors( 4061 (gl::ONE_MINUS_DST_ALPHA, gl::ONE_MINUS_SRC1_COLOR), 4062 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4063 ); 4064 } 4065 pub fn set_blend_mode_screen(&mut self) { 4066 self.set_blend_factors( 4067 (gl::ONE, gl::ONE_MINUS_SRC_COLOR), 4068 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4069 ); 4070 } 4071 pub fn set_blend_mode_plus_lighter(&mut self) { 4072 self.set_blend_factors( 4073 (gl::ONE, gl::ONE), 4074 (gl::ONE, gl::ONE), 4075 ); 4076 } 4077 pub fn set_blend_mode_exclusion(&mut self) { 4078 self.set_blend_factors( 4079 (gl::ONE_MINUS_DST_COLOR, gl::ONE_MINUS_SRC_COLOR), 4080 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4081 ); 4082 } 4083 pub fn set_blend_mode_show_overdraw(&mut self) { 4084 self.set_blend_factors( 4085 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4086 (gl::ONE, gl::ONE_MINUS_SRC_ALPHA), 4087 ); 4088 } 4089 4090 pub fn set_blend_mode_max(&mut self) { 4091 self.gl 4092 .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE); 4093 self.gl.blend_equation_separate(gl::MAX, gl::FUNC_ADD); 4094 #[cfg(debug_assertions)] 4095 { 4096 self.shader_is_ready = false; 4097 } 4098 } 4099 pub fn set_blend_mode_min(&mut self) { 4100 self.gl 4101 .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE); 4102 self.gl.blend_equation_separate(gl::MIN, gl::FUNC_ADD); 4103 #[cfg(debug_assertions)] 4104 { 4105 self.shader_is_ready = false; 4106 } 4107 } 4108 pub fn set_blend_mode_advanced(&mut self, mode: MixBlendMode) { 4109 self.gl.blend_equation(match mode { 4110 MixBlendMode::Normal => { 4111 // blend factor only make sense for the normal mode 4112 self.gl.blend_func_separate(gl::ZERO, gl::SRC_COLOR, gl::ZERO, gl::SRC_ALPHA); 4113 gl::FUNC_ADD 4114 }, 4115 MixBlendMode::PlusLighter => { 4116 return self.set_blend_mode_plus_lighter(); 4117 }, 4118 MixBlendMode::Multiply => gl::MULTIPLY_KHR, 4119 MixBlendMode::Screen => gl::SCREEN_KHR, 4120 MixBlendMode::Overlay => gl::OVERLAY_KHR, 4121 MixBlendMode::Darken => gl::DARKEN_KHR, 4122 MixBlendMode::Lighten => gl::LIGHTEN_KHR, 4123 MixBlendMode::ColorDodge => gl::COLORDODGE_KHR, 4124 MixBlendMode::ColorBurn => gl::COLORBURN_KHR, 4125 MixBlendMode::HardLight => gl::HARDLIGHT_KHR, 4126 MixBlendMode::SoftLight => gl::SOFTLIGHT_KHR, 4127 MixBlendMode::Difference => gl::DIFFERENCE_KHR, 4128 MixBlendMode::Exclusion => gl::EXCLUSION_KHR, 4129 MixBlendMode::Hue => gl::HSL_HUE_KHR, 4130 MixBlendMode::Saturation => gl::HSL_SATURATION_KHR, 4131 MixBlendMode::Color => gl::HSL_COLOR_KHR, 4132 MixBlendMode::Luminosity => gl::HSL_LUMINOSITY_KHR, 4133 }); 4134 #[cfg(debug_assertions)] 4135 { 4136 self.shader_is_ready = false; 4137 } 4138 } 4139 4140 pub fn supports_extension(&self, extension: &str) -> bool { 4141 supports_extension(&self.extensions, extension) 4142 } 4143 4144 pub fn echo_driver_messages(&self) { 4145 if self.capabilities.supports_khr_debug { 4146 Device::log_driver_messages(self.gl()); 4147 } 4148 } 4149 4150 fn log_driver_messages(gl: &dyn gl::Gl) { 4151 for msg in gl.get_debug_messages() { 4152 let level = match msg.severity { 4153 gl::DEBUG_SEVERITY_HIGH => Level::Error, 4154 gl::DEBUG_SEVERITY_MEDIUM => Level::Warn, 4155 gl::DEBUG_SEVERITY_LOW => Level::Info, 4156 gl::DEBUG_SEVERITY_NOTIFICATION => Level::Debug, 4157 _ => Level::Trace, 4158 }; 4159 let ty = match msg.ty { 4160 gl::DEBUG_TYPE_ERROR => "error", 4161 gl::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "deprecated", 4162 gl::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "undefined", 4163 gl::DEBUG_TYPE_PORTABILITY => "portability", 4164 gl::DEBUG_TYPE_PERFORMANCE => "perf", 4165 gl::DEBUG_TYPE_MARKER => "marker", 4166 gl::DEBUG_TYPE_PUSH_GROUP => "group push", 4167 gl::DEBUG_TYPE_POP_GROUP => "group pop", 4168 gl::DEBUG_TYPE_OTHER => "other", 4169 _ => "?", 4170 }; 4171 log!(level, "({}) {}", ty, msg.message); 4172 } 4173 } 4174 4175 pub fn gl_describe_format(&self, format: ImageFormat) -> FormatDesc { 4176 match format { 4177 ImageFormat::R8 => FormatDesc { 4178 internal: gl::R8, 4179 external: gl::RED, 4180 read: gl::RED, 4181 pixel_type: gl::UNSIGNED_BYTE, 4182 }, 4183 ImageFormat::R16 => FormatDesc { 4184 internal: gl::R16, 4185 external: gl::RED, 4186 read: gl::RED, 4187 pixel_type: gl::UNSIGNED_SHORT, 4188 }, 4189 ImageFormat::BGRA8 => { 4190 FormatDesc { 4191 internal: self.bgra_formats.internal, 4192 external: self.bgra_formats.external, 4193 read: gl::BGRA, 4194 pixel_type: self.bgra_pixel_type, 4195 } 4196 }, 4197 ImageFormat::RGBA8 => { 4198 FormatDesc { 4199 internal: gl::RGBA8, 4200 external: gl::RGBA, 4201 read: gl::RGBA, 4202 pixel_type: gl::UNSIGNED_BYTE, 4203 } 4204 }, 4205 ImageFormat::RGBAF32 => FormatDesc { 4206 internal: gl::RGBA32F, 4207 external: gl::RGBA, 4208 read: gl::RGBA, 4209 pixel_type: gl::FLOAT, 4210 }, 4211 ImageFormat::RGBAI32 => FormatDesc { 4212 internal: gl::RGBA32I, 4213 external: gl::RGBA_INTEGER, 4214 read: gl::RGBA_INTEGER, 4215 pixel_type: gl::INT, 4216 }, 4217 ImageFormat::RG8 => FormatDesc { 4218 internal: gl::RG8, 4219 external: gl::RG, 4220 read: gl::RG, 4221 pixel_type: gl::UNSIGNED_BYTE, 4222 }, 4223 ImageFormat::RG16 => FormatDesc { 4224 internal: gl::RG16, 4225 external: gl::RG, 4226 read: gl::RG, 4227 pixel_type: gl::UNSIGNED_SHORT, 4228 }, 4229 } 4230 } 4231 4232 /// Generates a memory report for the resources managed by the device layer. 4233 pub fn report_memory(&self, size_op_funs: &MallocSizeOfOps, swgl: *mut c_void) -> MemoryReport { 4234 let mut report = MemoryReport::default(); 4235 report.depth_target_textures += self.depth_targets_memory(); 4236 4237 #[cfg(feature = "sw_compositor")] 4238 if !swgl.is_null() { 4239 report.swgl += swgl::Context::from(swgl).report_memory(size_op_funs.size_of_op); 4240 } 4241 // unconditionally use swgl stuff 4242 let _ = size_op_funs; 4243 let _ = swgl; 4244 report 4245 } 4246 4247 pub fn depth_targets_memory(&self) -> usize { 4248 let mut total = 0; 4249 for dim in self.depth_targets.keys() { 4250 total += depth_target_size_in_bytes(dim); 4251 } 4252 4253 total 4254 } 4255 } 4256 4257 pub struct FormatDesc { 4258 /// Format the texel data is internally stored in within a texture. 4259 pub internal: gl::GLenum, 4260 /// Format that we expect the data to be provided when filling the texture. 4261 pub external: gl::GLuint, 4262 /// Format to read the texels as, so that they can be uploaded as `external` 4263 /// later on. 4264 pub read: gl::GLuint, 4265 /// Associated pixel type. 4266 pub pixel_type: gl::GLuint, 4267 } 4268 4269 #[derive(Debug)] 4270 struct UploadChunk<'a> { 4271 rect: DeviceIntRect, 4272 stride: Option<i32>, 4273 offset: usize, 4274 format_override: Option<ImageFormat>, 4275 texture: &'a Texture, 4276 } 4277 4278 #[derive(Debug)] 4279 struct PixelBuffer<'a> { 4280 size_used: usize, 4281 // small vector avoids heap allocation for a single chunk 4282 chunks: SmallVec<[UploadChunk<'a>; 1]>, 4283 inner: UploadPBO, 4284 mapping: &'a mut [mem::MaybeUninit<u8>], 4285 } 4286 4287 impl<'a> PixelBuffer<'a> { 4288 fn new( 4289 pbo: UploadPBO, 4290 ) -> Self { 4291 let mapping = unsafe { 4292 slice::from_raw_parts_mut(pbo.mapping.get_ptr().as_ptr(), pbo.pbo.reserved_size) 4293 }; 4294 Self { 4295 size_used: 0, 4296 chunks: SmallVec::new(), 4297 inner: pbo, 4298 mapping, 4299 } 4300 } 4301 4302 fn flush_chunks(&mut self, device: &mut Device) { 4303 for chunk in self.chunks.drain(..) { 4304 TextureUploader::update_impl(device, chunk); 4305 } 4306 } 4307 } 4308 4309 impl<'a> Drop for PixelBuffer<'a> { 4310 fn drop(&mut self) { 4311 assert_eq!(self.chunks.len(), 0, "PixelBuffer must be flushed before dropping."); 4312 } 4313 } 4314 4315 #[derive(Debug)] 4316 enum PBOMapping { 4317 Unmapped, 4318 Transient(ptr::NonNull<mem::MaybeUninit<u8>>), 4319 Persistent(ptr::NonNull<mem::MaybeUninit<u8>>), 4320 } 4321 4322 impl PBOMapping { 4323 fn get_ptr(&self) -> ptr::NonNull<mem::MaybeUninit<u8>> { 4324 match self { 4325 PBOMapping::Unmapped => unreachable!("Cannot get pointer to unmapped PBO."), 4326 PBOMapping::Transient(ptr) => *ptr, 4327 PBOMapping::Persistent(ptr) => *ptr, 4328 } 4329 } 4330 } 4331 4332 /// A PBO for uploading texture data, managed by UploadPBOPool. 4333 #[derive(Debug)] 4334 struct UploadPBO { 4335 pbo: PBO, 4336 mapping: PBOMapping, 4337 can_recycle: bool, 4338 } 4339 4340 impl UploadPBO { 4341 fn empty() -> Self { 4342 Self { 4343 pbo: PBO { 4344 id: 0, 4345 reserved_size: 0, 4346 }, 4347 mapping: PBOMapping::Unmapped, 4348 can_recycle: false, 4349 } 4350 } 4351 } 4352 4353 /// Allocates and recycles PBOs used for uploading texture data. 4354 /// Tries to allocate and recycle PBOs of a fixed size, but will make exceptions when 4355 /// a larger buffer is required or to work around driver bugs. 4356 pub struct UploadPBOPool { 4357 /// Usage hint to provide to the driver for optimizations. 4358 usage_hint: VertexUsageHint, 4359 /// The preferred size, in bytes, of the buffers to allocate. 4360 default_size: usize, 4361 /// List of allocated PBOs ready to be re-used. 4362 available_buffers: Vec<UploadPBO>, 4363 /// PBOs which have been returned during the current frame, 4364 /// and do not yet have an associated sync object. 4365 returned_buffers: Vec<UploadPBO>, 4366 /// PBOs which are waiting until their sync object is signalled, 4367 /// indicating they can are ready to be re-used. 4368 waiting_buffers: Vec<(gl::GLsync, Vec<UploadPBO>)>, 4369 /// PBOs which have been orphaned. 4370 /// We can recycle their IDs but must reallocate their storage. 4371 orphaned_buffers: Vec<PBO>, 4372 } 4373 4374 impl UploadPBOPool { 4375 pub fn new(device: &mut Device, default_size: usize) -> Self { 4376 let usage_hint = match device.upload_method { 4377 UploadMethod::Immediate => VertexUsageHint::Stream, 4378 UploadMethod::PixelBuffer(usage_hint) => usage_hint, 4379 }; 4380 Self { 4381 usage_hint, 4382 default_size, 4383 available_buffers: Vec::new(), 4384 returned_buffers: Vec::new(), 4385 waiting_buffers: Vec::new(), 4386 orphaned_buffers: Vec::new(), 4387 } 4388 } 4389 4390 /// To be called at the beginning of a series of uploads. 4391 /// Moves any buffers which are now ready to be used from the waiting list to the ready list. 4392 pub fn begin_frame(&mut self, device: &mut Device) { 4393 // Iterate through the waiting buffers and check if each fence has been signalled. 4394 // If a fence is signalled, move its corresponding buffers to the available list. 4395 // On error, delete the buffers. Stop when we find the first non-signalled fence, 4396 // and clean up the signalled fences. 4397 let mut first_not_signalled = self.waiting_buffers.len(); 4398 for (i, (sync, buffers)) in self.waiting_buffers.iter_mut().enumerate() { 4399 match device.gl.client_wait_sync(*sync, 0, 0) { 4400 gl::TIMEOUT_EXPIRED => { 4401 first_not_signalled = i; 4402 break; 4403 }, 4404 gl::ALREADY_SIGNALED | gl::CONDITION_SATISFIED => { 4405 self.available_buffers.extend(buffers.drain(..)); 4406 } 4407 gl::WAIT_FAILED | _ => { 4408 warn!("glClientWaitSync error in UploadPBOPool::begin_frame()"); 4409 for buffer in buffers.drain(..) { 4410 device.delete_pbo(buffer.pbo); 4411 } 4412 } 4413 } 4414 } 4415 4416 // Delete signalled fences, and remove their now-empty Vecs from waiting_buffers. 4417 for (sync, _) in self.waiting_buffers.drain(0..first_not_signalled) { 4418 device.gl.delete_sync(sync); 4419 } 4420 } 4421 4422 // To be called at the end of a series of uploads. 4423 // Creates a sync object, and adds the buffers returned during this frame to waiting_buffers. 4424 pub fn end_frame(&mut self, device: &mut Device) { 4425 if !self.returned_buffers.is_empty() { 4426 let sync = device.gl.fence_sync(gl::SYNC_GPU_COMMANDS_COMPLETE, 0); 4427 if !sync.is_null() { 4428 self.waiting_buffers.push((sync, mem::replace(&mut self.returned_buffers, Vec::new()))) 4429 } else { 4430 warn!("glFenceSync error in UploadPBOPool::end_frame()"); 4431 4432 for buffer in self.returned_buffers.drain(..) { 4433 device.delete_pbo(buffer.pbo); 4434 } 4435 } 4436 } 4437 } 4438 4439 /// Obtain a PBO, either by reusing an existing PBO or allocating a new one. 4440 /// min_size specifies the minimum required size of the PBO. The returned PBO 4441 /// may be larger than required. 4442 fn get_pbo(&mut self, device: &mut Device, min_size: usize) -> Result<UploadPBO, String> { 4443 4444 // If min_size is smaller than our default size, then use the default size. 4445 // The exception to this is when due to driver bugs we cannot upload from 4446 // offsets other than zero within a PBO. In this case, there is no point in 4447 // allocating buffers larger than required, as they cannot be shared. 4448 let (can_recycle, size) = if min_size <= self.default_size && device.capabilities.supports_nonzero_pbo_offsets { 4449 (true, self.default_size) 4450 } else { 4451 (false, min_size) 4452 }; 4453 4454 // Try to recycle an already allocated PBO. 4455 if can_recycle { 4456 if let Some(mut buffer) = self.available_buffers.pop() { 4457 assert_eq!(buffer.pbo.reserved_size, size); 4458 assert!(buffer.can_recycle); 4459 4460 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id); 4461 4462 match buffer.mapping { 4463 PBOMapping::Unmapped => { 4464 // If buffer was unmapped then transiently map it. 4465 let ptr = device.gl.map_buffer_range( 4466 gl::PIXEL_UNPACK_BUFFER, 4467 0, 4468 buffer.pbo.reserved_size as _, 4469 gl::MAP_WRITE_BIT | gl::MAP_UNSYNCHRONIZED_BIT, 4470 ) as *mut _; 4471 4472 let ptr = ptr::NonNull::new(ptr).ok_or_else( 4473 || format!("Failed to transiently map PBO of size {} bytes", buffer.pbo.reserved_size) 4474 )?; 4475 4476 buffer.mapping = PBOMapping::Transient(ptr); 4477 } 4478 PBOMapping::Transient(_) => { 4479 unreachable!("Transiently mapped UploadPBO must be unmapped before returning to pool."); 4480 } 4481 PBOMapping::Persistent(_) => { 4482 } 4483 } 4484 4485 return Ok(buffer); 4486 } 4487 } 4488 4489 // Try to recycle a PBO ID (but not its allocation) from a previously allocated PBO. 4490 // If there are none available, create a new PBO. 4491 let mut pbo = match self.orphaned_buffers.pop() { 4492 Some(pbo) => pbo, 4493 None => device.create_pbo(), 4494 }; 4495 4496 assert_eq!(pbo.reserved_size, 0); 4497 pbo.reserved_size = size; 4498 4499 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id); 4500 let mapping = if device.capabilities.supports_buffer_storage && can_recycle { 4501 device.gl.buffer_storage( 4502 gl::PIXEL_UNPACK_BUFFER, 4503 pbo.reserved_size as _, 4504 ptr::null(), 4505 gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT, 4506 ); 4507 let ptr = device.gl.map_buffer_range( 4508 gl::PIXEL_UNPACK_BUFFER, 4509 0, 4510 pbo.reserved_size as _, 4511 // GL_MAP_COHERENT_BIT doesn't seem to work on Adreno, so use glFlushMappedBufferRange. 4512 // kvark notes that coherent memory can be faster on some platforms, such as nvidia, 4513 // so in the future we could choose which to use at run time. 4514 gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT | gl::MAP_FLUSH_EXPLICIT_BIT, 4515 ) as *mut _; 4516 4517 let ptr = ptr::NonNull::new(ptr).ok_or_else( 4518 || format!("Failed to transiently map PBO of size {} bytes", pbo.reserved_size) 4519 )?; 4520 4521 PBOMapping::Persistent(ptr) 4522 } else { 4523 device.gl.buffer_data_untyped( 4524 gl::PIXEL_UNPACK_BUFFER, 4525 pbo.reserved_size as _, 4526 ptr::null(), 4527 self.usage_hint.to_gl(), 4528 ); 4529 let ptr = device.gl.map_buffer_range( 4530 gl::PIXEL_UNPACK_BUFFER, 4531 0, 4532 pbo.reserved_size as _, 4533 // Unlike the above code path, where we are re-mapping a buffer that has previously been unmapped, 4534 // this buffer has just been created there is no need for GL_MAP_UNSYNCHRONIZED_BIT. 4535 gl::MAP_WRITE_BIT, 4536 ) as *mut _; 4537 4538 let ptr = ptr::NonNull::new(ptr).ok_or_else( 4539 || format!("Failed to transiently map PBO of size {} bytes", pbo.reserved_size) 4540 )?; 4541 4542 PBOMapping::Transient(ptr) 4543 }; 4544 4545 Ok(UploadPBO { pbo, mapping, can_recycle }) 4546 } 4547 4548 /// Returns a PBO to the pool. If the PBO is recyclable it is placed in the waiting list. 4549 /// Otherwise we orphan the allocation immediately, and will subsequently reuse just the ID. 4550 fn return_pbo(&mut self, device: &mut Device, mut buffer: UploadPBO) { 4551 assert!( 4552 !matches!(buffer.mapping, PBOMapping::Transient(_)), 4553 "Transiently mapped UploadPBO must be unmapped before returning to pool.", 4554 ); 4555 4556 if buffer.can_recycle { 4557 self.returned_buffers.push(buffer); 4558 } else { 4559 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id); 4560 device.gl.buffer_data_untyped( 4561 gl::PIXEL_UNPACK_BUFFER, 4562 0, 4563 ptr::null(), 4564 gl::STREAM_DRAW, 4565 ); 4566 buffer.pbo.reserved_size = 0; 4567 self.orphaned_buffers.push(buffer.pbo); 4568 } 4569 4570 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); 4571 } 4572 4573 /// Frees all allocated buffers in response to a memory pressure event. 4574 pub fn on_memory_pressure(&mut self, device: &mut Device) { 4575 for buffer in self.available_buffers.drain(..) { 4576 device.delete_pbo(buffer.pbo); 4577 } 4578 for buffer in self.returned_buffers.drain(..) { 4579 device.delete_pbo(buffer.pbo) 4580 } 4581 for (sync, buffers) in self.waiting_buffers.drain(..) { 4582 device.gl.delete_sync(sync); 4583 for buffer in buffers { 4584 device.delete_pbo(buffer.pbo) 4585 } 4586 } 4587 // There is no need to delete orphaned PBOs on memory pressure. 4588 } 4589 4590 /// Generates a memory report. 4591 pub fn report_memory(&self) -> MemoryReport { 4592 let mut report = MemoryReport::default(); 4593 for buffer in &self.available_buffers { 4594 report.texture_upload_pbos += buffer.pbo.reserved_size; 4595 } 4596 for buffer in &self.returned_buffers { 4597 report.texture_upload_pbos += buffer.pbo.reserved_size; 4598 } 4599 for (_, buffers) in &self.waiting_buffers { 4600 for buffer in buffers { 4601 report.texture_upload_pbos += buffer.pbo.reserved_size; 4602 } 4603 } 4604 report 4605 } 4606 4607 pub fn deinit(&mut self, device: &mut Device) { 4608 for buffer in self.available_buffers.drain(..) { 4609 device.delete_pbo(buffer.pbo); 4610 } 4611 for buffer in self.returned_buffers.drain(..) { 4612 device.delete_pbo(buffer.pbo) 4613 } 4614 for (sync, buffers) in self.waiting_buffers.drain(..) { 4615 device.gl.delete_sync(sync); 4616 for buffer in buffers { 4617 device.delete_pbo(buffer.pbo) 4618 } 4619 } 4620 for pbo in self.orphaned_buffers.drain(..) { 4621 device.delete_pbo(pbo); 4622 } 4623 } 4624 } 4625 4626 /// Used to perform a series of texture uploads. 4627 /// Create using Device::upload_texture(). Perform a series of uploads using either 4628 /// upload(), or stage() and upload_staged(), then call flush(). 4629 pub struct TextureUploader<'a> { 4630 /// A list of buffers containing uploads that need to be flushed. 4631 buffers: Vec<PixelBuffer<'a>>, 4632 /// Pool used to obtain PBOs to fill with texture data. 4633 pub pbo_pool: &'a mut UploadPBOPool, 4634 } 4635 4636 impl<'a> Drop for TextureUploader<'a> { 4637 fn drop(&mut self) { 4638 assert!( 4639 thread::panicking() || self.buffers.is_empty(), 4640 "TextureUploader must be flushed before it is dropped." 4641 ); 4642 } 4643 } 4644 4645 /// A buffer used to manually stage data to be uploaded to a texture. 4646 /// Created by calling TextureUploader::stage(), the data can then be written to via get_mapping(). 4647 #[derive(Debug)] 4648 pub struct UploadStagingBuffer<'a> { 4649 /// The PixelBuffer containing this upload. 4650 buffer: PixelBuffer<'a>, 4651 /// The offset of this upload within the PixelBuffer. 4652 offset: usize, 4653 /// The size of this upload. 4654 size: usize, 4655 /// The stride of the data within the buffer. 4656 stride: usize, 4657 } 4658 4659 impl<'a> UploadStagingBuffer<'a> { 4660 /// Returns the required stride of the data to be written to the buffer. 4661 pub fn get_stride(&self) -> usize { 4662 self.stride 4663 } 4664 4665 /// Returns a mapping of the data in the buffer, to be written to. 4666 pub fn get_mapping(&mut self) -> &mut [mem::MaybeUninit<u8>] { 4667 &mut self.buffer.mapping[self.offset..self.offset + self.size] 4668 } 4669 } 4670 4671 impl<'a> TextureUploader<'a> { 4672 /// Returns an UploadStagingBuffer which can be used to manually stage data to be uploaded. 4673 /// Once the data has been staged, it can be uploaded with upload_staged(). 4674 pub fn stage( 4675 &mut self, 4676 device: &mut Device, 4677 format: ImageFormat, 4678 size: DeviceIntSize, 4679 ) -> Result<UploadStagingBuffer<'a>, String> { 4680 assert!(matches!(device.upload_method, UploadMethod::PixelBuffer(_)), "Texture uploads should only be staged when using pixel buffers."); 4681 4682 // for optimal PBO texture uploads the offset and stride of the data in 4683 // the buffer may have to be a multiple of a certain value. 4684 let (dst_size, dst_stride) = device.required_upload_size_and_stride( 4685 size, 4686 format, 4687 ); 4688 4689 // Find a pixel buffer with enough space remaining, creating a new one if required. 4690 let buffer_index = self.buffers.iter().position(|buffer| { 4691 buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size 4692 }); 4693 let buffer = match buffer_index { 4694 Some(i) => self.buffers.swap_remove(i), 4695 None => PixelBuffer::new(self.pbo_pool.get_pbo(device, dst_size)?), 4696 }; 4697 4698 if !device.capabilities.supports_nonzero_pbo_offsets { 4699 assert_eq!(buffer.size_used, 0, "PBO uploads from non-zero offset are not supported."); 4700 } 4701 assert!(buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size, "PixelBuffer is too small"); 4702 4703 let offset = buffer.size_used; 4704 4705 Ok(UploadStagingBuffer { 4706 buffer, 4707 offset, 4708 size: dst_size, 4709 stride: dst_stride, 4710 }) 4711 } 4712 4713 /// Uploads manually staged texture data to the specified texture. 4714 pub fn upload_staged( 4715 &mut self, 4716 device: &mut Device, 4717 texture: &'a Texture, 4718 rect: DeviceIntRect, 4719 format_override: Option<ImageFormat>, 4720 mut staging_buffer: UploadStagingBuffer<'a>, 4721 ) -> usize { 4722 let size = staging_buffer.size; 4723 4724 staging_buffer.buffer.chunks.push(UploadChunk { 4725 rect, 4726 stride: Some(staging_buffer.stride as i32), 4727 offset: staging_buffer.offset, 4728 format_override, 4729 texture, 4730 }); 4731 staging_buffer.buffer.size_used += staging_buffer.size; 4732 4733 // Flush the buffer if it is full, otherwise return it to the uploader for further use. 4734 if staging_buffer.buffer.size_used < staging_buffer.buffer.inner.pbo.reserved_size { 4735 self.buffers.push(staging_buffer.buffer); 4736 } else { 4737 Self::flush_buffer(device, self.pbo_pool, staging_buffer.buffer); 4738 } 4739 4740 size 4741 } 4742 4743 /// Uploads texture data to the specified texture. 4744 pub fn upload<T>( 4745 &mut self, 4746 device: &mut Device, 4747 texture: &'a Texture, 4748 mut rect: DeviceIntRect, 4749 stride: Option<i32>, 4750 format_override: Option<ImageFormat>, 4751 data: *const T, 4752 len: usize, 4753 ) -> usize { 4754 // Textures dimensions may have been clamped by the hardware. Crop the 4755 // upload region to match. 4756 let cropped = rect.intersection( 4757 &DeviceIntRect::from_size(texture.get_dimensions()) 4758 ); 4759 if cfg!(debug_assertions) && cropped.map_or(true, |r| r != rect) { 4760 warn!("Cropping texture upload {:?} to {:?}", rect, cropped); 4761 } 4762 rect = match cropped { 4763 None => return 0, 4764 Some(r) => r, 4765 }; 4766 4767 let bytes_pp = texture.format.bytes_per_pixel() as usize; 4768 let width_bytes = rect.width() as usize * bytes_pp; 4769 4770 let src_stride = stride.map_or(width_bytes, |stride| { 4771 assert!(stride >= 0); 4772 stride as usize 4773 }); 4774 let src_size = (rect.height() as usize - 1) * src_stride + width_bytes; 4775 assert!(src_size <= len * mem::size_of::<T>()); 4776 4777 match device.upload_method { 4778 UploadMethod::Immediate => { 4779 if cfg!(debug_assertions) { 4780 let mut bound_buffer = [0]; 4781 unsafe { 4782 device.gl.get_integer_v(gl::PIXEL_UNPACK_BUFFER_BINDING, &mut bound_buffer); 4783 } 4784 assert_eq!(bound_buffer[0], 0, "GL_PIXEL_UNPACK_BUFFER must not be bound for immediate uploads."); 4785 } 4786 4787 Self::update_impl(device, UploadChunk { 4788 rect, 4789 stride: Some(src_stride as i32), 4790 offset: data as _, 4791 format_override, 4792 texture, 4793 }); 4794 4795 width_bytes * rect.height() as usize 4796 } 4797 UploadMethod::PixelBuffer(_) => { 4798 let mut staging_buffer = match self.stage(device, texture.format, rect.size()) { 4799 Ok(staging_buffer) => staging_buffer, 4800 Err(_) => return 0, 4801 }; 4802 let dst_stride = staging_buffer.get_stride(); 4803 4804 unsafe { 4805 let src: &[mem::MaybeUninit<u8>] = slice::from_raw_parts(data as *const _, src_size); 4806 4807 if src_stride == dst_stride { 4808 // the stride is already optimal, so simply copy 4809 // the data as-is in to the buffer 4810 staging_buffer.get_mapping()[..src_size].copy_from_slice(src); 4811 } else { 4812 // copy the data line-by-line in to the buffer so 4813 // that it has an optimal stride 4814 for y in 0..rect.height() as usize { 4815 let src_start = y * src_stride; 4816 let src_end = src_start + width_bytes; 4817 let dst_start = y * staging_buffer.get_stride(); 4818 let dst_end = dst_start + width_bytes; 4819 4820 staging_buffer.get_mapping()[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) 4821 } 4822 } 4823 } 4824 4825 self.upload_staged(device, texture, rect, format_override, staging_buffer) 4826 } 4827 } 4828 } 4829 4830 fn flush_buffer(device: &mut Device, pbo_pool: &mut UploadPBOPool, mut buffer: PixelBuffer) { 4831 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.inner.pbo.id); 4832 match buffer.inner.mapping { 4833 PBOMapping::Unmapped => unreachable!("UploadPBO should be mapped at this stage."), 4834 PBOMapping::Transient(_) => { 4835 device.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER); 4836 buffer.inner.mapping = PBOMapping::Unmapped; 4837 } 4838 PBOMapping::Persistent(_) => { 4839 device.gl.flush_mapped_buffer_range(gl::PIXEL_UNPACK_BUFFER, 0, buffer.size_used as _); 4840 } 4841 } 4842 buffer.flush_chunks(device); 4843 let pbo = mem::replace(&mut buffer.inner, UploadPBO::empty()); 4844 pbo_pool.return_pbo(device, pbo); 4845 } 4846 4847 /// Flushes all pending texture uploads. Must be called after all 4848 /// required upload() or upload_staged() calls have been made. 4849 pub fn flush(mut self, device: &mut Device) { 4850 for buffer in self.buffers.drain(..) { 4851 Self::flush_buffer(device, self.pbo_pool, buffer); 4852 } 4853 4854 device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); 4855 } 4856 4857 fn update_impl(device: &mut Device, chunk: UploadChunk) { 4858 device.bind_texture(DEFAULT_TEXTURE, chunk.texture, Swizzle::default()); 4859 4860 let format = chunk.format_override.unwrap_or(chunk.texture.format); 4861 let (gl_format, bpp, data_type) = match format { 4862 ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE), 4863 ImageFormat::R16 => (gl::RED, 2, gl::UNSIGNED_SHORT), 4864 ImageFormat::BGRA8 => (device.bgra_formats.external, 4, device.bgra_pixel_type), 4865 ImageFormat::RGBA8 => (gl::RGBA, 4, gl::UNSIGNED_BYTE), 4866 ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE), 4867 ImageFormat::RG16 => (gl::RG, 4, gl::UNSIGNED_SHORT), 4868 ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT), 4869 ImageFormat::RGBAI32 => (gl::RGBA_INTEGER, 16, gl::INT), 4870 }; 4871 4872 let row_length = match chunk.stride { 4873 Some(value) => value / bpp, 4874 None => chunk.texture.size.width, 4875 }; 4876 4877 if chunk.stride.is_some() { 4878 device.gl.pixel_store_i( 4879 gl::UNPACK_ROW_LENGTH, 4880 row_length as _, 4881 ); 4882 } 4883 4884 let pos = chunk.rect.min; 4885 let size = chunk.rect.size(); 4886 4887 match chunk.texture.target { 4888 gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => { 4889 device.gl.tex_sub_image_2d_pbo( 4890 chunk.texture.target, 4891 0, 4892 pos.x as _, 4893 pos.y as _, 4894 size.width as _, 4895 size.height as _, 4896 gl_format, 4897 data_type, 4898 chunk.offset, 4899 ); 4900 } 4901 _ => panic!("BUG: Unexpected texture target!"), 4902 } 4903 4904 // If using tri-linear filtering, build the mip-map chain for this texture. 4905 if chunk.texture.filter == TextureFilter::Trilinear { 4906 device.gl.generate_mipmap(chunk.texture.target); 4907 } 4908 4909 // Reset row length to 0, otherwise the stride would apply to all texture uploads. 4910 if chunk.stride.is_some() { 4911 device.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _); 4912 } 4913 } 4914 } 4915 4916 fn texels_to_u8_slice<T: Texel>(texels: &[T]) -> &[u8] { 4917 unsafe { 4918 slice::from_raw_parts(texels.as_ptr() as *const u8, texels.len() * mem::size_of::<T>()) 4919 } 4920 }