gl.cc (93598B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include <stdlib.h> 6 #include <stdint.h> 7 #include <string.h> 8 #include <assert.h> 9 #include <stdio.h> 10 #include <math.h> 11 12 #ifdef __MACH__ 13 # include <mach/mach.h> 14 # include <mach/mach_time.h> 15 #else 16 # include <time.h> 17 #endif 18 19 #ifdef NDEBUG 20 # define debugf(...) 21 #else 22 # define debugf(...) printf(__VA_ARGS__) 23 #endif 24 25 // #define PRINT_TIMINGS 26 27 #ifdef _WIN32 28 # define ALWAYS_INLINE __forceinline 29 # define NO_INLINE __declspec(noinline) 30 31 // Including Windows.h brings a huge amount of namespace polution so just 32 // define a couple of things manually 33 typedef int BOOL; 34 # define WINAPI __stdcall 35 # define DECLSPEC_IMPORT __declspec(dllimport) 36 # define WINBASEAPI DECLSPEC_IMPORT 37 typedef unsigned long DWORD; 38 typedef long LONG; 39 typedef __int64 LONGLONG; 40 # define DUMMYSTRUCTNAME 41 42 typedef union _LARGE_INTEGER { 43 struct { 44 DWORD LowPart; 45 LONG HighPart; 46 } DUMMYSTRUCTNAME; 47 struct { 48 DWORD LowPart; 49 LONG HighPart; 50 } u; 51 LONGLONG QuadPart; 52 } LARGE_INTEGER; 53 extern "C" { 54 WINBASEAPI BOOL WINAPI 55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount); 56 57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency); 58 } 59 60 #else 61 // GCC is slower when dealing with always_inline, especially in debug builds. 62 // When using Clang, use always_inline more aggressively. 63 # if defined(__clang__) || defined(NDEBUG) 64 # define ALWAYS_INLINE __attribute__((always_inline)) inline 65 # else 66 # define ALWAYS_INLINE inline 67 # endif 68 # define NO_INLINE __attribute__((noinline)) 69 #endif 70 71 // Some functions may cause excessive binary bloat if inlined in debug or with 72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE. 73 #if defined(__clang__) && defined(NDEBUG) 74 # define PREFER_INLINE ALWAYS_INLINE 75 #else 76 # define PREFER_INLINE inline 77 #endif 78 79 #define UNREACHABLE __builtin_unreachable() 80 81 #define UNUSED [[maybe_unused]] 82 83 #define FALLTHROUGH [[fallthrough]] 84 85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN) 86 # define IMPLICIT __attribute__((annotate("moz_implicit"))) 87 #else 88 # define IMPLICIT 89 #endif 90 91 #if defined(_MSC_VER) 92 # define ALIGNED_DECL(_align, _type) __declspec(align(_align)) _type 93 #else 94 # define ALIGNED_DECL(_align, _type) _type __attribute__((aligned(_align))) 95 #endif 96 97 #include "gl_defs.h" 98 #include "glsl.h" 99 #include "program.h" 100 #include "texture.h" 101 102 using namespace glsl; 103 104 typedef ivec2_scalar IntPoint; 105 106 struct IntRect { 107 int x0; 108 int y0; 109 int x1; 110 int y1; 111 112 IntRect() : x0(0), y0(0), x1(0), y1(0) {} 113 IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {} 114 IntRect(IntPoint origin, IntPoint size) 115 : x0(origin.x), 116 y0(origin.y), 117 x1(origin.x + size.x), 118 y1(origin.y + size.y) {} 119 120 int width() const { return x1 - x0; } 121 int height() const { return y1 - y0; } 122 bool is_empty() const { return width() <= 0 || height() <= 0; } 123 124 IntPoint origin() const { return IntPoint(x0, y0); } 125 126 bool same_size(const IntRect& o) const { 127 return width() == o.width() && height() == o.height(); 128 } 129 130 bool contains(const IntRect& o) const { 131 return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1; 132 } 133 134 IntRect& intersect(const IntRect& o) { 135 x0 = max(x0, o.x0); 136 y0 = max(y0, o.y0); 137 x1 = min(x1, o.x1); 138 y1 = min(y1, o.y1); 139 return *this; 140 } 141 142 IntRect intersection(const IntRect& o) { 143 IntRect result = *this; 144 result.intersect(o); 145 return result; 146 } 147 148 // Scale from source-space to dest-space, optionally rounding inward 149 IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight, 150 bool roundIn = false) { 151 x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth; 152 y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight; 153 x1 = (x1 * dstWidth) / srcWidth; 154 y1 = (y1 * dstHeight) / srcHeight; 155 return *this; 156 } 157 158 // Flip the rect's Y coords around inflection point at Y=offset 159 void invert_y(int offset) { 160 y0 = offset - y0; 161 y1 = offset - y1; 162 swap(y0, y1); 163 } 164 165 IntRect& offset(const IntPoint& o) { 166 x0 += o.x; 167 y0 += o.y; 168 x1 += o.x; 169 y1 += o.y; 170 return *this; 171 } 172 173 IntRect operator+(const IntPoint& o) const { 174 return IntRect(*this).offset(o); 175 } 176 IntRect operator-(const IntPoint& o) const { 177 return IntRect(*this).offset(-o); 178 } 179 }; 180 181 typedef vec2_scalar Point2D; 182 typedef vec4_scalar Point3D; 183 184 struct IntRange { 185 int start; 186 int end; 187 188 int len() const { return end - start; } 189 190 IntRange intersect(IntRange r) const { 191 return {max(start, r.start), min(end, r.end)}; 192 } 193 }; 194 195 struct FloatRange { 196 float start; 197 float end; 198 199 float clip(float x) const { return clamp(x, start, end); } 200 201 FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; } 202 203 FloatRange merge(FloatRange r) const { 204 return {min(start, r.start), max(end, r.end)}; 205 } 206 207 IntRange round() const { 208 return {int(floor(start + 0.5f)), int(floor(end + 0.5f))}; 209 } 210 211 IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; } 212 }; 213 214 template <typename P> 215 static inline FloatRange x_range(P p0, P p1) { 216 return {min(p0.x, p1.x), max(p0.x, p1.x)}; 217 } 218 219 struct VertexAttrib { 220 size_t size = 0; // in bytes 221 GLenum type = 0; 222 bool normalized = false; 223 GLsizei stride = 0; 224 GLuint offset = 0; 225 bool enabled = false; 226 GLuint divisor = 0; 227 int vertex_array = 0; 228 int vertex_buffer = 0; 229 char* buf = nullptr; // XXX: this can easily dangle 230 size_t buf_size = 0; // this will let us bounds check 231 232 // Mark the buffer as invalid so we don't accidentally use stale data. 233 void disable() { 234 enabled = false; 235 buf = nullptr; 236 buf_size = 0; 237 } 238 }; 239 240 static int bytes_for_internal_format(GLenum internal_format) { 241 switch (internal_format) { 242 case GL_RGBA32F: 243 return 4 * 4; 244 case GL_RGBA32I: 245 case GL_RGBA_INTEGER: 246 return 4 * 4; 247 case GL_RGBA8: 248 case GL_BGRA8: 249 case GL_RGBA: 250 case GL_BGRA: 251 return 4; 252 case GL_R8: 253 case GL_RED: 254 return 1; 255 case GL_RG8: 256 case GL_RG: 257 return 2; 258 case GL_DEPTH_COMPONENT: 259 case GL_DEPTH_COMPONENT16: 260 case GL_DEPTH_COMPONENT24: 261 case GL_DEPTH_COMPONENT32: 262 return 4; 263 case GL_RGB_RAW_422_APPLE: 264 return 2; 265 case GL_R16: 266 return 2; 267 case GL_RG16: 268 return 4; 269 default: 270 debugf("internal format: %x\n", internal_format); 271 assert(0); 272 return 0; 273 } 274 } 275 276 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; } 277 278 static TextureFormat gl_format_to_texture_format(int type) { 279 switch (type) { 280 case GL_RGBA32F: 281 return TextureFormat::RGBA32F; 282 case GL_RGBA32I: 283 return TextureFormat::RGBA32I; 284 case GL_RGBA8: 285 return TextureFormat::RGBA8; 286 case GL_R8: 287 return TextureFormat::R8; 288 case GL_RG8: 289 return TextureFormat::RG8; 290 case GL_R16: 291 return TextureFormat::R16; 292 case GL_RG16: 293 return TextureFormat::RG16; 294 case GL_RGB_RAW_422_APPLE: 295 return TextureFormat::YUY2; 296 default: 297 assert(0); 298 return TextureFormat::RGBA8; 299 } 300 } 301 302 struct Query { 303 uint64_t value = 0; 304 }; 305 306 struct Buffer { 307 char* buf = nullptr; 308 size_t size = 0; 309 size_t capacity = 0; 310 311 // Returns true if re-allocation succeeded, false otherwise... 312 bool allocate(size_t new_size) { 313 // If the size remains unchanged, don't allocate anything. 314 if (new_size == size) { 315 return true; 316 } 317 // If the new size is within the existing capacity of the buffer, just 318 // reuse the existing buffer. 319 if (new_size <= capacity) { 320 size = new_size; 321 return true; 322 } 323 // Otherwise we need to reallocate the buffer to hold up to the requested 324 // larger size. 325 char* new_buf = (char*)realloc(buf, new_size); 326 assert(new_buf); 327 if (!new_buf) { 328 // If we fail, null out the buffer rather than leave around the old 329 // allocation state. 330 cleanup(); 331 return false; 332 } 333 // The reallocation succeeded, so install the buffer. 334 buf = new_buf; 335 size = new_size; 336 capacity = new_size; 337 return true; 338 } 339 340 void cleanup() { 341 if (buf) { 342 free(buf); 343 buf = nullptr; 344 size = 0; 345 capacity = 0; 346 } 347 } 348 349 ~Buffer() { cleanup(); } 350 351 char* end_ptr() const { return buf ? buf + size : nullptr; } 352 353 void* get_data(void* data) { 354 if (buf) { 355 size_t offset = (size_t)data; 356 if (offset < size) { 357 return buf + offset; 358 } 359 } 360 return nullptr; 361 } 362 }; 363 364 struct Framebuffer { 365 GLuint color_attachment = 0; 366 GLuint depth_attachment = 0; 367 }; 368 369 struct Renderbuffer { 370 GLuint texture = 0; 371 372 void on_erase(); 373 }; 374 375 TextureFilter gl_filter_to_texture_filter(int type) { 376 switch (type) { 377 case GL_NEAREST: 378 return TextureFilter::NEAREST; 379 case GL_NEAREST_MIPMAP_LINEAR: 380 return TextureFilter::NEAREST; 381 case GL_NEAREST_MIPMAP_NEAREST: 382 return TextureFilter::NEAREST; 383 case GL_LINEAR: 384 return TextureFilter::LINEAR; 385 case GL_LINEAR_MIPMAP_LINEAR: 386 return TextureFilter::LINEAR; 387 case GL_LINEAR_MIPMAP_NEAREST: 388 return TextureFilter::LINEAR; 389 default: 390 assert(0); 391 return TextureFilter::NEAREST; 392 } 393 } 394 395 struct Texture { 396 GLenum internal_format = 0; 397 int width = 0; 398 int height = 0; 399 char* buf = nullptr; 400 size_t buf_size = 0; 401 uint32_t buf_stride = 0; 402 uint8_t buf_bpp = 0; 403 GLenum min_filter = GL_NEAREST; 404 GLenum mag_filter = GL_LINEAR; 405 // The number of active locks on this texture. If this texture has any active 406 // locks, we need to disallow modifying or destroying the texture as it may 407 // be accessed by other threads where modifications could lead to races. 408 int32_t locked = 0; 409 // When used as an attachment of a framebuffer, rendering to the texture 410 // behaves as if it is located at the given offset such that the offset is 411 // subtracted from all transformed vertexes after the viewport is applied. 412 IntPoint offset; 413 414 enum FLAGS { 415 // If the buffer is internally-allocated by SWGL 416 SHOULD_FREE = 1 << 1, 417 // If the buffer has been cleared to initialize it. Currently this is only 418 // utilized by depth buffers which need to know when depth runs have reset 419 // to a valid row state. When unset, the depth runs may contain garbage. 420 CLEARED = 1 << 2, 421 // The texture was deleted while still locked and must stay alive until all 422 // locks are released. 423 ZOMBIE = 1 << 3, 424 }; 425 int flags = SHOULD_FREE; 426 bool should_free() const { return bool(flags & SHOULD_FREE); } 427 bool cleared() const { return bool(flags & CLEARED); } 428 bool zombie() const { return bool(flags & ZOMBIE); } 429 430 void set_flag(int flag, bool val) { 431 if (val) { 432 flags |= flag; 433 } else { 434 flags &= ~flag; 435 } 436 } 437 void set_should_free(bool val) { 438 // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we 439 // might accidentally mistakenly realloc an externally allocated buffer as 440 // if it were an internally allocated one. 441 assert(!buf); 442 set_flag(SHOULD_FREE, val); 443 } 444 void set_cleared(bool val) { set_flag(CLEARED, val); } 445 void set_zombie(bool val) { set_flag(ZOMBIE, val); } 446 447 // Delayed-clearing state. When a clear of an FB is requested, we don't 448 // immediately clear each row, as the rows may be subsequently overwritten 449 // by draw calls, allowing us to skip the work of clearing the affected rows 450 // either fully or partially. Instead, we keep a bit vector of rows that need 451 // to be cleared later and save the value they need to be cleared with so 452 // that we can clear these rows individually when they are touched by draws. 453 // This currently only works for 2D textures, but not on texture arrays. 454 int delay_clear = 0; 455 uint32_t clear_val = 0; 456 uint32_t* cleared_rows = nullptr; 457 458 void init_depth_runs(uint32_t z); 459 void fill_depth_runs(uint32_t z, const IntRect& scissor); 460 461 void enable_delayed_clear(uint32_t val) { 462 delay_clear = height; 463 clear_val = val; 464 if (!cleared_rows) { 465 cleared_rows = new uint32_t[(height + 31) / 32]; 466 } 467 memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t)); 468 if (height & 31) { 469 cleared_rows[height / 32] = ~0U << (height & 31); 470 } 471 } 472 473 void disable_delayed_clear() { 474 if (cleared_rows) { 475 delete[] cleared_rows; 476 cleared_rows = nullptr; 477 delay_clear = 0; 478 } 479 } 480 481 int bpp() const { return buf_bpp; } 482 int compute_bpp() const { return bytes_for_internal_format(internal_format); } 483 484 size_t stride() const { return buf_stride; } 485 size_t compute_stride(int bpp, int width) const { 486 return aligned_stride(bpp * width); 487 } 488 489 // Set an external backing buffer of this texture. 490 void set_buffer(void* new_buf, size_t new_stride) { 491 assert(!should_free()); 492 // Ensure that the supplied stride is at least as big as the row data and 493 // is aligned to the smaller of either the BPP or word-size. We need to at 494 // least be able to sample data from within a row and sample whole pixels 495 // of smaller formats without risking unaligned access. 496 int new_bpp = compute_bpp(); 497 assert(new_stride >= size_t(new_bpp * width) && 498 new_stride % min(new_bpp, sizeof(uint32_t)) == 0); 499 500 buf = (char*)new_buf; 501 buf_size = 0; 502 buf_bpp = new_bpp; 503 buf_stride = new_stride; 504 } 505 506 // Returns true if re-allocation succeeded, false otherwise... 507 bool allocate(bool force = false, int min_width = 0, int min_height = 0) { 508 assert(!locked); // Locked textures shouldn't be reallocated 509 // If we get here, some GL API call that invalidates the texture was used. 510 // Mark the buffer as not-cleared to signal this. 511 set_cleared(false); 512 // Check if there is either no buffer currently or if we forced validation 513 // of the buffer size because some dimension might have changed. 514 if ((!buf || force) && should_free()) { 515 // Compute the buffer's BPP and stride, since they may have changed. 516 int new_bpp = compute_bpp(); 517 size_t new_stride = compute_stride(new_bpp, width); 518 // Compute new size based on the maximum potential stride, rather than 519 // the current stride, to hopefully avoid reallocations when size would 520 // otherwise change too much... 521 size_t max_stride = compute_stride(new_bpp, max(width, min_width)); 522 size_t size = max_stride * max(height, min_height); 523 if ((!buf && size > 0) || size > buf_size) { 524 // Allocate with a SIMD register-sized tail of padding at the end so we 525 // can safely read or write past the end of the texture with SIMD ops. 526 // Currently only the flat Z-buffer texture needs this padding due to 527 // full-register loads and stores in check_depth and discard_depth. In 528 // case some code in the future accidentally uses a linear filter on a 529 // texture with less than 2 pixels per row, we also add this padding 530 // just to be safe. All other texture types and use-cases should be 531 // safe to omit padding. 532 size_t padding = 533 internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2 534 ? sizeof(Float) 535 : 0; 536 char* new_buf = (char*)realloc(buf, size + padding); 537 assert(new_buf); 538 if (!new_buf) { 539 // Allocation failed, so ensure we don't leave stale buffer state. 540 cleanup(); 541 return false; 542 } 543 // Successfully reallocated the buffer, so go ahead and set it. 544 buf = new_buf; 545 buf_size = size; 546 } 547 // Set the BPP and stride in case they changed. 548 buf_bpp = new_bpp; 549 buf_stride = new_stride; 550 } 551 // Allocation succeeded or nothing changed... 552 return true; 553 } 554 555 void cleanup() { 556 assert(!locked); // Locked textures shouldn't be destroyed 557 if (buf) { 558 // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out, 559 // regardless of whether we internally allocated it. This will prevent us 560 // from wrongly treating buf as having been internally allocated for when 561 // we go to realloc if it actually was externally allocted. 562 if (should_free()) { 563 free(buf); 564 } 565 buf = nullptr; 566 buf_size = 0; 567 buf_bpp = 0; 568 buf_stride = 0; 569 } 570 disable_delayed_clear(); 571 } 572 573 ~Texture() { cleanup(); } 574 575 IntRect bounds() const { return IntRect{0, 0, width, height}; } 576 IntRect offset_bounds() const { return bounds() + offset; } 577 578 // Find the valid sampling bounds relative to the requested region 579 IntRect sample_bounds(const IntRect& req, bool invertY = false) const { 580 IntRect bb = bounds().intersect(req) - req.origin(); 581 if (invertY) bb.invert_y(req.height()); 582 return bb; 583 } 584 585 // Get a pointer for sampling at the given offset 586 char* sample_ptr(int x, int y) const { 587 return buf + y * stride() + x * bpp(); 588 } 589 590 // Get a pointer to the end of the current buffer 591 char* end_ptr() const { 592 return buf + (height - 1) * stride() + width * bpp(); 593 } 594 595 // Get a pointer for sampling the requested region and limit to the provided 596 // sampling bounds 597 char* sample_ptr(const IntRect& req, const IntRect& bounds, 598 bool invertY = false) const { 599 // Offset the sample pointer by the clamped bounds 600 int x = req.x0 + bounds.x0; 601 // Invert the Y offset if necessary 602 int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0; 603 return sample_ptr(x, y); 604 } 605 }; 606 607 // The last vertex attribute is reserved as a null attribute in case a vertex 608 // attribute is used without being set. 609 #define MAX_ATTRIBS 17 610 #define NULL_ATTRIB 16 611 struct VertexArray { 612 VertexAttrib attribs[MAX_ATTRIBS]; 613 int max_attrib = -1; 614 // The GL spec defines element array buffer binding to be part of VAO state. 615 GLuint element_array_buffer_binding = 0; 616 617 void validate(); 618 }; 619 620 struct Shader { 621 GLenum type = 0; 622 ProgramLoader loader = nullptr; 623 }; 624 625 struct Program { 626 ProgramImpl* impl = nullptr; 627 VertexShaderImpl* vert_impl = nullptr; 628 FragmentShaderImpl* frag_impl = nullptr; 629 bool deleted = false; 630 631 ~Program() { delete impl; } 632 }; 633 634 // clang-format off 635 // Fully-expand GL defines while ignoring more than 4 suffixes 636 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w 637 // Generate a blend key enum symbol 638 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0) 639 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0) 640 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0) 641 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0) 642 643 // Utility macro to easily generate similar code for all implemented blend modes 644 #define FOR_EACH_BLEND_KEY(macro) \ 645 macro(GL_ONE, GL_ZERO, 0, 0) \ 646 macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \ 647 macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \ 648 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0) \ 649 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE) \ 650 macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \ 651 macro(GL_ZERO, GL_SRC_COLOR, 0, 0) \ 652 macro(GL_ONE, GL_ONE, 0, 0) \ 653 macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \ 654 macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \ 655 macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \ 656 macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \ 657 macro(GL_MIN, 0, 0, 0) \ 658 macro(GL_MAX, 0, 0, 0) \ 659 macro(GL_MULTIPLY_KHR, 0, 0, 0) \ 660 macro(GL_SCREEN_KHR, 0, 0, 0) \ 661 macro(GL_OVERLAY_KHR, 0, 0, 0) \ 662 macro(GL_DARKEN_KHR, 0, 0, 0) \ 663 macro(GL_LIGHTEN_KHR, 0, 0, 0) \ 664 macro(GL_COLORDODGE_KHR, 0, 0, 0) \ 665 macro(GL_COLORBURN_KHR, 0, 0, 0) \ 666 macro(GL_HARDLIGHT_KHR, 0, 0, 0) \ 667 macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \ 668 macro(GL_DIFFERENCE_KHR, 0, 0, 0) \ 669 macro(GL_EXCLUSION_KHR, 0, 0, 0) \ 670 macro(GL_HSL_HUE_KHR, 0, 0, 0) \ 671 macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \ 672 macro(GL_HSL_COLOR_KHR, 0, 0, 0) \ 673 macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0) \ 674 macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0) \ 675 macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0) 676 677 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__), 678 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__), 679 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__), 680 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__), 681 enum BlendKey : uint8_t { 682 FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY) 683 FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY) 684 FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY) 685 FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY) 686 BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO), 687 MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO), 688 AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO), 689 AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO), 690 }; 691 // clang-format on 692 693 const size_t MAX_TEXTURE_UNITS = 16; 694 695 template <typename T> 696 static inline bool unlink(T& binding, T n) { 697 if (binding == n) { 698 binding = 0; 699 return true; 700 } 701 return false; 702 } 703 704 template <typename O> 705 struct ObjectStore { 706 O** objects = nullptr; 707 size_t size = 0; 708 // reserve object 0 as null 709 size_t first_free = 1; 710 O invalid; 711 712 ~ObjectStore() { 713 if (objects) { 714 for (size_t i = 0; i < size; i++) delete objects[i]; 715 free(objects); 716 } 717 } 718 719 bool grow(size_t i) { 720 size_t new_size = size ? size : 8; 721 while (new_size <= i) new_size += new_size / 2; 722 O** new_objects = (O**)realloc(objects, new_size * sizeof(O*)); 723 assert(new_objects); 724 if (!new_objects) return false; 725 while (size < new_size) new_objects[size++] = nullptr; 726 objects = new_objects; 727 return true; 728 } 729 730 void insert(size_t i, const O& o) { 731 if (i >= size && !grow(i)) return; 732 if (!objects[i]) objects[i] = new O(o); 733 } 734 735 size_t next_free() { 736 size_t i = first_free; 737 while (i < size && objects[i]) i++; 738 first_free = i; 739 return i; 740 } 741 742 size_t insert(const O& o = O()) { 743 size_t i = next_free(); 744 insert(i, o); 745 return i; 746 } 747 748 O& operator[](size_t i) { 749 insert(i, O()); 750 return i < size ? *objects[i] : invalid; 751 } 752 753 O* find(size_t i) const { return i < size ? objects[i] : nullptr; } 754 755 template <typename T> 756 void on_erase(T*, ...) {} 757 template <typename T> 758 void on_erase(T* o, decltype(&T::on_erase)) { 759 o->on_erase(); 760 } 761 762 bool erase(size_t i, bool should_delete = true) { 763 if (i < size && objects[i]) { 764 on_erase(objects[i], nullptr); 765 if (should_delete) { 766 delete objects[i]; 767 } 768 objects[i] = nullptr; 769 if (i < first_free) first_free = i; 770 return true; 771 } 772 return false; 773 } 774 775 O** begin() const { return objects; } 776 O** end() const { return &objects[size]; } 777 }; 778 779 struct Context { 780 int32_t references = 1; 781 782 ObjectStore<Query> queries; 783 ObjectStore<Buffer> buffers; 784 ObjectStore<Texture> textures; 785 ObjectStore<VertexArray> vertex_arrays; 786 ObjectStore<Framebuffer> framebuffers; 787 ObjectStore<Renderbuffer> renderbuffers; 788 ObjectStore<Shader> shaders; 789 ObjectStore<Program> programs; 790 791 GLenum last_error = GL_NO_ERROR; 792 793 IntRect viewport = {0, 0, 0, 0}; 794 795 bool blend = false; 796 GLenum blendfunc_srgb = GL_ONE; 797 GLenum blendfunc_drgb = GL_ZERO; 798 GLenum blendfunc_sa = GL_ONE; 799 GLenum blendfunc_da = GL_ZERO; 800 GLenum blend_equation = GL_FUNC_ADD; 801 V8<uint16_t> blendcolor = 0; 802 BlendKey blend_key = BLEND_KEY_NONE; 803 804 bool depthtest = false; 805 bool depthmask = true; 806 GLenum depthfunc = GL_LESS; 807 808 bool scissortest = false; 809 IntRect scissor = {0, 0, 0, 0}; 810 811 GLfloat clearcolor[4] = {0, 0, 0, 0}; 812 GLdouble cleardepth = 1; 813 814 int unpack_row_length = 0; 815 816 int shaded_rows = 0; 817 int shaded_pixels = 0; 818 819 struct TextureUnit { 820 GLuint texture_2d_binding = 0; 821 GLuint texture_rectangle_binding = 0; 822 823 void unlink(GLuint n) { 824 ::unlink(texture_2d_binding, n); 825 ::unlink(texture_rectangle_binding, n); 826 } 827 }; 828 TextureUnit texture_units[MAX_TEXTURE_UNITS]; 829 int active_texture_unit = 0; 830 831 GLuint current_program = 0; 832 833 GLuint current_vertex_array = 0; 834 bool validate_vertex_array = true; 835 836 GLuint pixel_pack_buffer_binding = 0; 837 GLuint pixel_unpack_buffer_binding = 0; 838 GLuint array_buffer_binding = 0; 839 GLuint time_elapsed_query = 0; 840 GLuint samples_passed_query = 0; 841 GLuint renderbuffer_binding = 0; 842 GLuint draw_framebuffer_binding = 0; 843 GLuint read_framebuffer_binding = 0; 844 GLuint unknown_binding = 0; 845 846 GLuint& get_binding(GLenum name) { 847 switch (name) { 848 case GL_PIXEL_PACK_BUFFER: 849 return pixel_pack_buffer_binding; 850 case GL_PIXEL_UNPACK_BUFFER: 851 return pixel_unpack_buffer_binding; 852 case GL_ARRAY_BUFFER: 853 return array_buffer_binding; 854 case GL_ELEMENT_ARRAY_BUFFER: 855 return vertex_arrays[current_vertex_array].element_array_buffer_binding; 856 case GL_TEXTURE_2D: 857 return texture_units[active_texture_unit].texture_2d_binding; 858 case GL_TEXTURE_RECTANGLE: 859 return texture_units[active_texture_unit].texture_rectangle_binding; 860 case GL_TIME_ELAPSED: 861 return time_elapsed_query; 862 case GL_SAMPLES_PASSED: 863 return samples_passed_query; 864 case GL_RENDERBUFFER: 865 return renderbuffer_binding; 866 case GL_DRAW_FRAMEBUFFER: 867 return draw_framebuffer_binding; 868 case GL_READ_FRAMEBUFFER: 869 return read_framebuffer_binding; 870 default: 871 debugf("unknown binding %x\n", name); 872 assert(false); 873 return unknown_binding; 874 } 875 } 876 877 Texture& get_texture(sampler2D, int unit) { 878 return textures[texture_units[unit].texture_2d_binding]; 879 } 880 881 Texture& get_texture(isampler2D, int unit) { 882 return textures[texture_units[unit].texture_2d_binding]; 883 } 884 885 Texture& get_texture(sampler2DRect, int unit) { 886 return textures[texture_units[unit].texture_rectangle_binding]; 887 } 888 889 IntRect apply_scissor(IntRect bb, 890 const IntPoint& origin = IntPoint(0, 0)) const { 891 return scissortest ? bb.intersect(scissor - origin) : bb; 892 } 893 894 IntRect apply_scissor(const Texture& t) const { 895 return apply_scissor(t.bounds(), t.offset); 896 } 897 }; 898 static Context* ctx = nullptr; 899 static VertexShaderImpl* vertex_shader = nullptr; 900 static FragmentShaderImpl* fragment_shader = nullptr; 901 static BlendKey blend_key = BLEND_KEY_NONE; 902 903 static void prepare_texture(Texture& t, const IntRect* skip = nullptr); 904 905 template <typename S> 906 static inline void init_filter(S* s, Texture& t) { 907 // If the width is not at least 2 pixels, then we can't safely sample the end 908 // of the row with a linear filter. In that case, just punt to using nearest 909 // filtering instead. 910 s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter) 911 : TextureFilter::NEAREST; 912 } 913 914 template <typename S> 915 static inline void init_sampler(S* s, Texture& t) { 916 prepare_texture(t); 917 s->width = t.width; 918 s->height = t.height; 919 s->stride = t.stride(); 920 int bpp = t.bpp(); 921 if (bpp >= 4) 922 s->stride /= 4; 923 else if (bpp == 2) 924 s->stride /= 2; 925 else 926 assert(bpp == 1); 927 // Use uint32_t* for easier sampling, but need to cast to uint8_t* or 928 // uint16_t* for formats with bpp < 4. 929 s->buf = (uint32_t*)t.buf; 930 s->format = gl_format_to_texture_format(t.internal_format); 931 } 932 933 template <typename S> 934 static inline void null_sampler(S* s) { 935 // For null texture data, just make the sampler provide a 1x1 buffer that is 936 // transparent black. Ensure buffer holds at least a SIMD vector of zero data 937 // for SIMD padding of unaligned loads. 938 static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0}; 939 s->width = 1; 940 s->height = 1; 941 s->stride = s->width; 942 s->buf = (uint32_t*)zeroBuf; 943 s->format = TextureFormat::RGBA8; 944 } 945 946 template <typename S> 947 static inline void null_filter(S* s) { 948 s->filter = TextureFilter::NEAREST; 949 } 950 951 template <typename S> 952 S* lookup_sampler(S* s, int texture) { 953 Texture& t = ctx->get_texture(s, texture); 954 if (!t.buf) { 955 null_sampler(s); 956 null_filter(s); 957 } else { 958 init_sampler(s, t); 959 init_filter(s, t); 960 } 961 return s; 962 } 963 964 template <typename S> 965 S* lookup_isampler(S* s, int texture) { 966 Texture& t = ctx->get_texture(s, texture); 967 if (!t.buf) { 968 null_sampler(s); 969 } else { 970 init_sampler(s, t); 971 } 972 return s; 973 } 974 975 int bytes_per_type(GLenum type) { 976 switch (type) { 977 case GL_INT: 978 return 4; 979 case GL_FLOAT: 980 return 4; 981 case GL_UNSIGNED_SHORT: 982 return 2; 983 case GL_UNSIGNED_BYTE: 984 return 1; 985 default: 986 assert(0); 987 return 0; 988 } 989 } 990 991 template <typename S, typename C> 992 static inline S expand_attrib(const char* buf, size_t size, bool normalized) { 993 typedef typename ElementType<S>::ty elem_type; 994 S scalar = {0}; 995 const C* src = reinterpret_cast<const C*>(buf); 996 if (normalized) { 997 const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1); 998 for (size_t i = 0; i < size / sizeof(C); i++) { 999 put_nth_component(scalar, i, elem_type(src[i]) * scale); 1000 } 1001 } else { 1002 for (size_t i = 0; i < size / sizeof(C); i++) { 1003 put_nth_component(scalar, i, elem_type(src[i])); 1004 } 1005 } 1006 return scalar; 1007 } 1008 1009 template <typename S> 1010 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) { 1011 if (sizeof(S) <= va.size) { 1012 return *reinterpret_cast<const S*>(src); 1013 } 1014 if (va.type == GL_UNSIGNED_SHORT) { 1015 return expand_attrib<S, uint16_t>(src, va.size, va.normalized); 1016 } 1017 if (va.type == GL_UNSIGNED_BYTE) { 1018 return expand_attrib<S, uint8_t>(src, va.size, va.normalized); 1019 } 1020 assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type)); 1021 S scalar = {0}; 1022 memcpy(&scalar, src, va.size); 1023 return scalar; 1024 } 1025 1026 template <typename T> 1027 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance, 1028 int count) { 1029 typedef decltype(force_scalar(attrib)) scalar_type; 1030 // If no buffer is available, just use a zero default. 1031 if (!va.buf_size) { 1032 attrib = T(scalar_type{0}); 1033 } else if (va.divisor != 0) { 1034 char* src = (char*)va.buf + va.stride * instance + va.offset; 1035 assert(src + va.size <= va.buf + va.buf_size); 1036 attrib = T(load_attrib_scalar<scalar_type>(va, src)); 1037 } else { 1038 // Specialized for WR's primitive vertex order/winding. 1039 if (!count) return; 1040 assert(count >= 2 && count <= 4); 1041 char* src = (char*)va.buf + va.stride * start + va.offset; 1042 switch (count) { 1043 case 2: { 1044 // Lines must be indexed at offsets 0, 1. 1045 // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0. 1046 scalar_type lanes[2] = { 1047 load_attrib_scalar<scalar_type>(va, src), 1048 load_attrib_scalar<scalar_type>(va, src + va.stride)}; 1049 attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]}; 1050 break; 1051 } 1052 case 3: { 1053 // Triangles must be indexed at offsets 0, 1, 2. 1054 // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2. 1055 scalar_type lanes[3] = { 1056 load_attrib_scalar<scalar_type>(va, src), 1057 load_attrib_scalar<scalar_type>(va, src + va.stride), 1058 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)}; 1059 attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]}; 1060 break; 1061 } 1062 default: 1063 // Quads must be successive triangles indexed at offsets 0, 1, 2, 2, 1064 // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so 1065 // that the points form a convex path that can be traversed by the 1066 // rasterizer. 1067 attrib = (T){load_attrib_scalar<scalar_type>(va, src), 1068 load_attrib_scalar<scalar_type>(va, src + va.stride), 1069 load_attrib_scalar<scalar_type>(va, src + va.stride * 3), 1070 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)}; 1071 break; 1072 } 1073 } 1074 } 1075 1076 template <typename T> 1077 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance, 1078 int count) { 1079 typedef decltype(force_scalar(attrib)) scalar_type; 1080 // If no buffer is available, just use a zero default. 1081 if (!va.buf_size) { 1082 attrib = T{0}; 1083 return; 1084 } 1085 char* src = nullptr; 1086 if (va.divisor != 0) { 1087 src = (char*)va.buf + va.stride * instance + va.offset; 1088 } else { 1089 if (!count) return; 1090 src = (char*)va.buf + va.stride * start + va.offset; 1091 } 1092 assert(src + va.size <= va.buf + va.buf_size); 1093 attrib = T(load_attrib_scalar<scalar_type>(va, src)); 1094 } 1095 1096 void setup_program(GLuint program) { 1097 if (!program) { 1098 vertex_shader = nullptr; 1099 fragment_shader = nullptr; 1100 return; 1101 } 1102 Program& p = ctx->programs[program]; 1103 assert(p.impl); 1104 assert(p.vert_impl); 1105 assert(p.frag_impl); 1106 vertex_shader = p.vert_impl; 1107 fragment_shader = p.frag_impl; 1108 } 1109 1110 extern ProgramLoader load_shader(const char* name); 1111 1112 extern "C" { 1113 1114 void UseProgram(GLuint program) { 1115 if (ctx->current_program && program != ctx->current_program) { 1116 auto* p = ctx->programs.find(ctx->current_program); 1117 if (p && p->deleted) { 1118 ctx->programs.erase(ctx->current_program); 1119 } 1120 } 1121 ctx->current_program = program; 1122 setup_program(program); 1123 } 1124 1125 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) { 1126 ctx->viewport = IntRect{x, y, x + width, y + height}; 1127 } 1128 1129 void Enable(GLenum cap) { 1130 switch (cap) { 1131 case GL_BLEND: 1132 ctx->blend = true; 1133 break; 1134 case GL_DEPTH_TEST: 1135 ctx->depthtest = true; 1136 break; 1137 case GL_SCISSOR_TEST: 1138 ctx->scissortest = true; 1139 break; 1140 } 1141 } 1142 1143 void Disable(GLenum cap) { 1144 switch (cap) { 1145 case GL_BLEND: 1146 ctx->blend = false; 1147 break; 1148 case GL_DEPTH_TEST: 1149 ctx->depthtest = false; 1150 break; 1151 case GL_SCISSOR_TEST: 1152 ctx->scissortest = false; 1153 break; 1154 } 1155 } 1156 1157 // Report the last error generated and clear the error status. 1158 GLenum GetError() { 1159 GLenum error = ctx->last_error; 1160 ctx->last_error = GL_NO_ERROR; 1161 return error; 1162 } 1163 1164 // Sets the error status to out-of-memory to indicate that a buffer 1165 // or texture re-allocation failed. 1166 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; } 1167 1168 static const char* const extensions[] = { 1169 "GL_ARB_blend_func_extended", 1170 "GL_ARB_clear_texture", 1171 "GL_ARB_copy_image", 1172 "GL_ARB_draw_instanced", 1173 "GL_ARB_explicit_attrib_location", 1174 "GL_ARB_instanced_arrays", 1175 "GL_ARB_invalidate_subdata", 1176 "GL_ARB_texture_storage", 1177 "GL_EXT_timer_query", 1178 "GL_KHR_blend_equation_advanced", 1179 "GL_KHR_blend_equation_advanced_coherent", 1180 "GL_APPLE_rgb_422", 1181 }; 1182 1183 void GetIntegerv(GLenum pname, GLint* params) { 1184 assert(params); 1185 switch (pname) { 1186 case GL_MAX_TEXTURE_UNITS: 1187 case GL_MAX_TEXTURE_IMAGE_UNITS: 1188 params[0] = MAX_TEXTURE_UNITS; 1189 break; 1190 case GL_MAX_TEXTURE_SIZE: 1191 params[0] = 1 << 15; 1192 break; 1193 case GL_MAX_ARRAY_TEXTURE_LAYERS: 1194 params[0] = 0; 1195 break; 1196 case GL_READ_FRAMEBUFFER_BINDING: 1197 params[0] = ctx->read_framebuffer_binding; 1198 break; 1199 case GL_DRAW_FRAMEBUFFER_BINDING: 1200 params[0] = ctx->draw_framebuffer_binding; 1201 break; 1202 case GL_PIXEL_PACK_BUFFER_BINDING: 1203 params[0] = ctx->pixel_pack_buffer_binding; 1204 break; 1205 case GL_PIXEL_UNPACK_BUFFER_BINDING: 1206 params[0] = ctx->pixel_unpack_buffer_binding; 1207 break; 1208 case GL_NUM_EXTENSIONS: 1209 params[0] = sizeof(extensions) / sizeof(extensions[0]); 1210 break; 1211 case GL_MAJOR_VERSION: 1212 params[0] = 3; 1213 break; 1214 case GL_MINOR_VERSION: 1215 params[0] = 2; 1216 break; 1217 case GL_MIN_PROGRAM_TEXEL_OFFSET: 1218 params[0] = 0; 1219 break; 1220 case GL_MAX_PROGRAM_TEXEL_OFFSET: 1221 params[0] = MAX_TEXEL_OFFSET; 1222 break; 1223 default: 1224 debugf("unhandled glGetIntegerv parameter %x\n", pname); 1225 assert(false); 1226 } 1227 } 1228 1229 void GetBooleanv(GLenum pname, GLboolean* params) { 1230 assert(params); 1231 switch (pname) { 1232 case GL_DEPTH_WRITEMASK: 1233 params[0] = ctx->depthmask; 1234 break; 1235 default: 1236 debugf("unhandled glGetBooleanv parameter %x\n", pname); 1237 assert(false); 1238 } 1239 } 1240 1241 const char* GetString(GLenum name) { 1242 switch (name) { 1243 case GL_VENDOR: 1244 return "Mozilla Gfx"; 1245 case GL_RENDERER: 1246 return "Software WebRender"; 1247 case GL_VERSION: 1248 return "3.2"; 1249 case GL_SHADING_LANGUAGE_VERSION: 1250 return "1.50"; 1251 default: 1252 debugf("unhandled glGetString parameter %x\n", name); 1253 assert(false); 1254 return nullptr; 1255 } 1256 } 1257 1258 const char* GetStringi(GLenum name, GLuint index) { 1259 switch (name) { 1260 case GL_EXTENSIONS: 1261 if (index >= sizeof(extensions) / sizeof(extensions[0])) { 1262 return nullptr; 1263 } 1264 return extensions[index]; 1265 default: 1266 debugf("unhandled glGetStringi parameter %x\n", name); 1267 assert(false); 1268 return nullptr; 1269 } 1270 } 1271 1272 GLenum remap_blendfunc(GLenum rgb, GLenum a) { 1273 switch (a) { 1274 case GL_SRC_ALPHA: 1275 if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR; 1276 break; 1277 case GL_ONE_MINUS_SRC_ALPHA: 1278 if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR; 1279 break; 1280 case GL_DST_ALPHA: 1281 if (rgb == GL_DST_COLOR) a = GL_DST_COLOR; 1282 break; 1283 case GL_ONE_MINUS_DST_ALPHA: 1284 if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR; 1285 break; 1286 case GL_CONSTANT_ALPHA: 1287 if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR; 1288 break; 1289 case GL_ONE_MINUS_CONSTANT_ALPHA: 1290 if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR; 1291 break; 1292 case GL_SRC_COLOR: 1293 if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA; 1294 break; 1295 case GL_ONE_MINUS_SRC_COLOR: 1296 if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA; 1297 break; 1298 case GL_DST_COLOR: 1299 if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA; 1300 break; 1301 case GL_ONE_MINUS_DST_COLOR: 1302 if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA; 1303 break; 1304 case GL_CONSTANT_COLOR: 1305 if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA; 1306 break; 1307 case GL_ONE_MINUS_CONSTANT_COLOR: 1308 if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA; 1309 break; 1310 case GL_SRC1_ALPHA: 1311 if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR; 1312 break; 1313 case GL_ONE_MINUS_SRC1_ALPHA: 1314 if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR; 1315 break; 1316 case GL_SRC1_COLOR: 1317 if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA; 1318 break; 1319 case GL_ONE_MINUS_SRC1_COLOR: 1320 if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA; 1321 break; 1322 } 1323 return a; 1324 } 1325 1326 // Generate a hashed blend key based on blend func and equation state. This 1327 // allows all the blend state to be processed down to a blend key that can be 1328 // dealt with inside a single switch statement. 1329 static void hash_blend_key() { 1330 GLenum srgb = ctx->blendfunc_srgb; 1331 GLenum drgb = ctx->blendfunc_drgb; 1332 GLenum sa = ctx->blendfunc_sa; 1333 GLenum da = ctx->blendfunc_da; 1334 GLenum equation = ctx->blend_equation; 1335 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20)) 1336 // Basic non-separate blend funcs used the two argument form 1337 int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0); 1338 // Separate alpha blend funcs use the 4 argument hash 1339 if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da); 1340 // Any other blend equation than the default func_add ignores the func and 1341 // instead generates a one-argument hash based on the equation 1342 if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0); 1343 switch (hash) { 1344 #define MAP_BLEND_KEY(...) \ 1345 case HASH_BLEND_KEY(__VA_ARGS__): \ 1346 ctx->blend_key = BLEND_KEY(__VA_ARGS__); \ 1347 break; 1348 FOR_EACH_BLEND_KEY(MAP_BLEND_KEY) 1349 default: 1350 debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb, 1351 sa, da, equation); 1352 assert(false); 1353 break; 1354 } 1355 } 1356 1357 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) { 1358 ctx->blendfunc_srgb = srgb; 1359 ctx->blendfunc_drgb = drgb; 1360 sa = remap_blendfunc(srgb, sa); 1361 da = remap_blendfunc(drgb, da); 1362 ctx->blendfunc_sa = sa; 1363 ctx->blendfunc_da = da; 1364 1365 hash_blend_key(); 1366 } 1367 1368 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { 1369 I32 c = round_pixel((Float){b, g, r, a}); 1370 ctx->blendcolor = CONVERT(c, U16).xyzwxyzw; 1371 } 1372 1373 void BlendEquation(GLenum mode) { 1374 assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX || 1375 (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR)); 1376 if (mode != ctx->blend_equation) { 1377 ctx->blend_equation = mode; 1378 hash_blend_key(); 1379 } 1380 } 1381 1382 void DepthMask(GLboolean flag) { ctx->depthmask = flag; } 1383 1384 void DepthFunc(GLenum func) { 1385 switch (func) { 1386 case GL_LESS: 1387 case GL_LEQUAL: 1388 break; 1389 default: 1390 assert(false); 1391 } 1392 ctx->depthfunc = func; 1393 } 1394 1395 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) { 1396 ctx->scissor = IntRect{x, y, x + width, y + height}; 1397 } 1398 1399 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { 1400 ctx->clearcolor[0] = r; 1401 ctx->clearcolor[1] = g; 1402 ctx->clearcolor[2] = b; 1403 ctx->clearcolor[3] = a; 1404 } 1405 1406 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; } 1407 1408 void ActiveTexture(GLenum texture) { 1409 assert(texture >= GL_TEXTURE0); 1410 assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS); 1411 ctx->active_texture_unit = 1412 clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1)); 1413 } 1414 1415 void GenQueries(GLsizei n, GLuint* result) { 1416 for (int i = 0; i < n; i++) { 1417 Query q; 1418 result[i] = ctx->queries.insert(q); 1419 } 1420 } 1421 1422 void DeleteQuery(GLuint n) { 1423 if (n && ctx->queries.erase(n)) { 1424 unlink(ctx->time_elapsed_query, n); 1425 unlink(ctx->samples_passed_query, n); 1426 } 1427 } 1428 1429 void GenBuffers(int n, GLuint* result) { 1430 for (int i = 0; i < n; i++) { 1431 Buffer b; 1432 result[i] = ctx->buffers.insert(b); 1433 } 1434 } 1435 1436 void DeleteBuffer(GLuint n) { 1437 if (n && ctx->buffers.erase(n)) { 1438 unlink(ctx->pixel_pack_buffer_binding, n); 1439 unlink(ctx->pixel_unpack_buffer_binding, n); 1440 unlink(ctx->array_buffer_binding, n); 1441 } 1442 } 1443 1444 void GenVertexArrays(int n, GLuint* result) { 1445 for (int i = 0; i < n; i++) { 1446 VertexArray v; 1447 result[i] = ctx->vertex_arrays.insert(v); 1448 } 1449 } 1450 1451 void DeleteVertexArray(GLuint n) { 1452 if (n && ctx->vertex_arrays.erase(n)) { 1453 unlink(ctx->current_vertex_array, n); 1454 } 1455 } 1456 1457 GLuint CreateShader(GLenum type) { 1458 Shader s; 1459 s.type = type; 1460 return ctx->shaders.insert(s); 1461 } 1462 1463 void ShaderSourceByName(GLuint shader, char* name) { 1464 Shader& s = ctx->shaders[shader]; 1465 s.loader = load_shader(name); 1466 if (!s.loader) { 1467 debugf("unknown shader %s\n", name); 1468 } 1469 } 1470 1471 void AttachShader(GLuint program, GLuint shader) { 1472 Program& p = ctx->programs[program]; 1473 Shader& s = ctx->shaders[shader]; 1474 if (s.type == GL_VERTEX_SHADER) { 1475 if (!p.impl && s.loader) p.impl = s.loader(); 1476 } else if (s.type == GL_FRAGMENT_SHADER) { 1477 if (!p.impl && s.loader) p.impl = s.loader(); 1478 } else { 1479 assert(0); 1480 } 1481 } 1482 1483 void DeleteShader(GLuint n) { 1484 if (n) ctx->shaders.erase(n); 1485 } 1486 1487 GLuint CreateProgram() { 1488 Program p; 1489 return ctx->programs.insert(p); 1490 } 1491 1492 void DeleteProgram(GLuint n) { 1493 if (!n) return; 1494 if (ctx->current_program == n) { 1495 if (auto* p = ctx->programs.find(n)) { 1496 p->deleted = true; 1497 } 1498 } else { 1499 ctx->programs.erase(n); 1500 } 1501 } 1502 1503 void LinkProgram(GLuint program) { 1504 Program& p = ctx->programs[program]; 1505 assert(p.impl); 1506 if (!p.impl) { 1507 return; 1508 } 1509 assert(p.impl->interpolants_size() <= sizeof(Interpolants)); 1510 if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader(); 1511 if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader(); 1512 } 1513 1514 GLint GetLinkStatus(GLuint program) { 1515 if (auto* p = ctx->programs.find(program)) { 1516 return p->impl ? 1 : 0; 1517 } 1518 return 0; 1519 } 1520 1521 void BindAttribLocation(GLuint program, GLuint index, char* name) { 1522 Program& p = ctx->programs[program]; 1523 assert(p.impl); 1524 if (!p.impl) { 1525 return; 1526 } 1527 p.impl->bind_attrib(name, index); 1528 } 1529 1530 GLint GetAttribLocation(GLuint program, char* name) { 1531 Program& p = ctx->programs[program]; 1532 assert(p.impl); 1533 if (!p.impl) { 1534 return -1; 1535 } 1536 return p.impl->get_attrib(name); 1537 } 1538 1539 GLint GetUniformLocation(GLuint program, char* name) { 1540 Program& p = ctx->programs[program]; 1541 assert(p.impl); 1542 if (!p.impl) { 1543 return -1; 1544 } 1545 GLint loc = p.impl->get_uniform(name); 1546 // debugf("location: %d\n", loc); 1547 return loc; 1548 } 1549 1550 static uint64_t get_time_value() { 1551 #ifdef __MACH__ 1552 return mach_absolute_time(); 1553 #elif defined(_WIN32) 1554 LARGE_INTEGER time; 1555 static bool have_frequency = false; 1556 static LARGE_INTEGER frequency; 1557 if (!have_frequency) { 1558 QueryPerformanceFrequency(&frequency); 1559 have_frequency = true; 1560 } 1561 QueryPerformanceCounter(&time); 1562 return time.QuadPart * 1000000000ULL / frequency.QuadPart; 1563 #else 1564 return ({ 1565 struct timespec tp; 1566 clock_gettime(CLOCK_MONOTONIC, &tp); 1567 tp.tv_sec * 1000000000ULL + tp.tv_nsec; 1568 }); 1569 #endif 1570 } 1571 1572 void BeginQuery(GLenum target, GLuint id) { 1573 ctx->get_binding(target) = id; 1574 Query& q = ctx->queries[id]; 1575 switch (target) { 1576 case GL_SAMPLES_PASSED: 1577 q.value = 0; 1578 break; 1579 case GL_TIME_ELAPSED: 1580 q.value = get_time_value(); 1581 break; 1582 default: 1583 debugf("unknown query target %x for query %d\n", target, id); 1584 assert(false); 1585 } 1586 } 1587 1588 void EndQuery(GLenum target) { 1589 Query& q = ctx->queries[ctx->get_binding(target)]; 1590 switch (target) { 1591 case GL_SAMPLES_PASSED: 1592 break; 1593 case GL_TIME_ELAPSED: 1594 q.value = get_time_value() - q.value; 1595 break; 1596 default: 1597 debugf("unknown query target %x\n", target); 1598 assert(false); 1599 } 1600 ctx->get_binding(target) = 0; 1601 } 1602 1603 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) { 1604 Query& q = ctx->queries[id]; 1605 switch (pname) { 1606 case GL_QUERY_RESULT: 1607 assert(params); 1608 params[0] = q.value; 1609 break; 1610 default: 1611 assert(false); 1612 } 1613 } 1614 1615 void BindVertexArray(GLuint vertex_array) { 1616 if (vertex_array != ctx->current_vertex_array) { 1617 ctx->validate_vertex_array = true; 1618 } 1619 ctx->current_vertex_array = vertex_array; 1620 } 1621 1622 void BindTexture(GLenum target, GLuint texture) { 1623 ctx->get_binding(target) = texture; 1624 } 1625 1626 void BindBuffer(GLenum target, GLuint buffer) { 1627 ctx->get_binding(target) = buffer; 1628 } 1629 1630 void BindFramebuffer(GLenum target, GLuint fb) { 1631 if (target == GL_FRAMEBUFFER) { 1632 ctx->read_framebuffer_binding = fb; 1633 ctx->draw_framebuffer_binding = fb; 1634 } else { 1635 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER); 1636 ctx->get_binding(target) = fb; 1637 } 1638 } 1639 1640 void BindRenderbuffer(GLenum target, GLuint rb) { 1641 ctx->get_binding(target) = rb; 1642 } 1643 1644 void PixelStorei(GLenum name, GLint param) { 1645 if (name == GL_UNPACK_ALIGNMENT) { 1646 assert(param == 1); 1647 } else if (name == GL_UNPACK_ROW_LENGTH) { 1648 ctx->unpack_row_length = param; 1649 } 1650 } 1651 1652 static GLenum remap_internal_format(GLenum format) { 1653 switch (format) { 1654 case GL_DEPTH_COMPONENT: 1655 return GL_DEPTH_COMPONENT24; 1656 case GL_RGBA: 1657 return GL_RGBA8; 1658 case GL_RED: 1659 return GL_R8; 1660 case GL_RG: 1661 return GL_RG8; 1662 case GL_RGB_422_APPLE: 1663 return GL_RGB_RAW_422_APPLE; 1664 default: 1665 return format; 1666 } 1667 } 1668 1669 } // extern "C" 1670 1671 static bool format_requires_conversion(GLenum external_format, 1672 GLenum internal_format) { 1673 switch (external_format) { 1674 case GL_RGBA: 1675 return internal_format == GL_RGBA8; 1676 case GL_RED: 1677 return internal_format != GL_R8 && internal_format != GL_R16; 1678 case GL_RG: 1679 return internal_format != GL_RG8 && internal_format != GL_RG16; 1680 default: 1681 return false; 1682 } 1683 } 1684 1685 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src, 1686 int width) { 1687 for (; width >= 4; width -= 4, dest += 4, src += 4) { 1688 U32 p = unaligned_load<U32>(src); 1689 U32 rb = p & 0x00FF00FF; 1690 unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16)); 1691 } 1692 for (; width > 0; width--, dest++, src++) { 1693 uint32_t p = *src; 1694 uint32_t rb = p & 0x00FF00FF; 1695 *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16); 1696 } 1697 } 1698 1699 static inline void copy_red_to_rgba32f(float* dest, const float* src, 1700 int width) { 1701 for (; width > 0; width--, dest += 4, src++) { 1702 dest[0] = *src; 1703 dest[1] = 0.0f; 1704 dest[2] = 0.0f; 1705 dest[3] = 1.0f; 1706 } 1707 } 1708 1709 static inline void copy_red_to_bgra8(uint8_t* dest, const uint8_t* src, 1710 int width) { 1711 for (; width > 0; width--, dest += 4, src++) { 1712 dest[0] = 0; 1713 dest[1] = 0; 1714 dest[2] = *src; 1715 dest[3] = 255; 1716 } 1717 } 1718 1719 template <typename T, size_t N = 1> 1720 static int clip_ptrs_against_bounds(T*& dst_buf, T* dst_bound0, T* dst_bound1, 1721 const T*& src_buf, const T* src_bound0, 1722 const T* src_bound1, size_t& len) { 1723 if (dst_bound0) { 1724 assert(dst_bound0 <= dst_bound1); 1725 if (dst_buf < dst_bound0) { 1726 size_t offset = size_t(dst_bound0 - dst_buf) / N; 1727 if (len <= offset) { 1728 // dst entirely before bounds 1729 len = 0; 1730 return -1; 1731 } 1732 // dst overlaps bound0 1733 src_buf += offset; 1734 dst_buf += offset * N; 1735 len -= offset; 1736 } 1737 if (dst_buf >= dst_bound1) { 1738 // dst entirely after bounds 1739 len = 0; 1740 return 1; 1741 } 1742 size_t remaining = size_t(dst_bound1 - dst_buf) / N; 1743 if (len > remaining) { 1744 // dst overlaps bound1 1745 len = remaining; 1746 } 1747 } 1748 if (src_bound0) { 1749 assert(src_bound0 <= src_bound1); 1750 if (src_buf < src_bound0) { 1751 size_t offset = size_t(src_bound0 - src_buf); 1752 if (len <= offset) { 1753 // src entirely before bounds 1754 len = 0; 1755 return -1; 1756 } 1757 // src overlaps bound0 1758 src_buf += offset; 1759 dst_buf += offset * N; 1760 len -= offset; 1761 } 1762 if (src_buf >= src_bound1) { 1763 // src entirely after bounds 1764 len = 0; 1765 return 1; 1766 } 1767 size_t remaining = size_t(src_bound1 - src_buf); 1768 if (len > remaining) { 1769 // src overlaps bound1 1770 len = remaining; 1771 } 1772 } 1773 return 0; 1774 } 1775 1776 static void convert_copy(GLenum external_format, GLenum internal_format, 1777 uint8_t* dst_buf, size_t dst_stride, 1778 uint8_t* dst_bound0, uint8_t* dst_bound1, 1779 const uint8_t* src_buf, size_t src_stride, 1780 const uint8_t* src_bound0, const uint8_t* src_bound1, 1781 size_t width, size_t height) { 1782 switch (external_format) { 1783 case GL_RGBA: 1784 if (internal_format == GL_RGBA8) { 1785 for (; height; height--) { 1786 size_t len = width; 1787 uint32_t* dst_ptr = (uint32_t*)dst_buf; 1788 const uint32_t* src_ptr = (const uint32_t*)src_buf; 1789 if (clip_ptrs_against_bounds(dst_ptr, (uint32_t*)dst_bound0, 1790 (uint32_t*)dst_bound1, src_ptr, 1791 (const uint32_t*)src_bound0, 1792 (const uint32_t*)src_bound1, len) > 0) { 1793 return; 1794 } 1795 if (len) { 1796 copy_bgra8_to_rgba8(dst_ptr, src_ptr, len); 1797 } 1798 dst_buf += dst_stride; 1799 src_buf += src_stride; 1800 } 1801 return; 1802 } 1803 break; 1804 case GL_RED: 1805 switch (internal_format) { 1806 case GL_RGBA8: 1807 for (; height; height--) { 1808 size_t len = width; 1809 uint8_t* dst_ptr = dst_buf; 1810 const uint8_t* src_ptr = src_buf; 1811 if (clip_ptrs_against_bounds<uint8_t, 4>( 1812 dst_ptr, dst_bound0, dst_bound1, src_ptr, src_bound0, 1813 src_bound1, len) > 0) { 1814 return; 1815 } 1816 if (len) { 1817 copy_red_to_bgra8(dst_ptr, src_ptr, len); 1818 } 1819 dst_buf += dst_stride; 1820 src_buf += src_stride; 1821 } 1822 return; 1823 case GL_RGBA32F: 1824 for (; height; height--) { 1825 size_t len = width; 1826 float* dst_ptr = (float*)dst_buf; 1827 const float* src_ptr = (const float*)src_buf; 1828 if (clip_ptrs_against_bounds<float, 4>( 1829 dst_ptr, (float*)dst_bound0, (float*)dst_bound1, src_ptr, 1830 (const float*)src_bound0, (const float*)src_bound1, 1831 len) > 0) { 1832 return; 1833 } 1834 if (len) { 1835 copy_red_to_rgba32f(dst_ptr, src_ptr, len); 1836 } 1837 dst_buf += dst_stride; 1838 src_buf += src_stride; 1839 } 1840 return; 1841 case GL_R8: 1842 break; 1843 default: 1844 debugf("unsupported format conversion from %x to %x\n", 1845 external_format, internal_format); 1846 assert(false); 1847 return; 1848 } 1849 break; 1850 default: 1851 break; 1852 } 1853 size_t row_bytes = width * bytes_for_internal_format(internal_format); 1854 for (; height; height--) { 1855 size_t len = row_bytes; 1856 uint8_t* dst_ptr = dst_buf; 1857 const uint8_t* src_ptr = src_buf; 1858 if (clip_ptrs_against_bounds(dst_ptr, dst_bound0, dst_bound1, src_ptr, 1859 src_bound0, src_bound1, len) > 0) { 1860 return; 1861 } 1862 if (len) { 1863 memcpy(dst_ptr, src_ptr, len); 1864 } 1865 dst_buf += dst_stride; 1866 src_buf += src_stride; 1867 } 1868 } 1869 1870 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width, 1871 GLsizei height, void* buf = nullptr, 1872 GLsizei stride = 0, GLsizei min_width = 0, 1873 GLsizei min_height = 0) { 1874 GLenum internal_format = remap_internal_format(external_format); 1875 bool changed = false; 1876 if (t.width != width || t.height != height || 1877 t.internal_format != internal_format) { 1878 changed = true; 1879 t.internal_format = internal_format; 1880 t.width = width; 1881 t.height = height; 1882 } 1883 // If we are changed from an internally managed buffer to an externally 1884 // supplied one or vice versa, ensure that we clean up old buffer state. 1885 // However, if we have to convert the data from a non-native format, then 1886 // always treat it as internally managed since we will need to copy to an 1887 // internally managed native format buffer. 1888 bool should_free = buf == nullptr || format_requires_conversion( 1889 external_format, internal_format); 1890 if (t.should_free() != should_free) { 1891 changed = true; 1892 t.cleanup(); 1893 t.set_should_free(should_free); 1894 } 1895 // If now an external buffer, explicitly set it... 1896 if (!should_free) { 1897 t.set_buffer(buf, stride); 1898 } 1899 t.disable_delayed_clear(); 1900 if (!t.allocate(changed, min_width, min_height)) { 1901 out_of_memory(); 1902 } 1903 // If we have a buffer that needs format conversion, then do that now. 1904 if (buf && should_free) { 1905 convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(), 1906 (uint8_t*)t.buf, (uint8_t*)t.end_ptr(), (const uint8_t*)buf, 1907 stride, nullptr, nullptr, width, height); 1908 } 1909 } 1910 1911 extern "C" { 1912 1913 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format, 1914 GLsizei width, GLsizei height) { 1915 assert(levels == 1); 1916 Texture& t = ctx->textures[ctx->get_binding(target)]; 1917 set_tex_storage(t, internal_format, width, height); 1918 } 1919 1920 GLenum internal_format_for_data(GLenum format, GLenum ty) { 1921 if (format == GL_RED && ty == GL_UNSIGNED_BYTE) { 1922 return GL_R8; 1923 } else if ((format == GL_RGBA || format == GL_BGRA) && 1924 (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) { 1925 return GL_RGBA8; 1926 } else if (format == GL_RGBA && ty == GL_FLOAT) { 1927 return GL_RGBA32F; 1928 } else if (format == GL_RGBA_INTEGER && ty == GL_INT) { 1929 return GL_RGBA32I; 1930 } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) { 1931 return GL_RG8; 1932 } else if (format == GL_RGB_422_APPLE && 1933 ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { 1934 return GL_RGB_RAW_422_APPLE; 1935 } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) { 1936 return GL_R16; 1937 } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) { 1938 return GL_RG16; 1939 } else { 1940 debugf("unknown internal format for format %x, type %x\n", format, ty); 1941 assert(false); 1942 return 0; 1943 } 1944 } 1945 1946 static Buffer* get_pixel_pack_buffer() { 1947 return ctx->pixel_pack_buffer_binding 1948 ? &ctx->buffers[ctx->pixel_pack_buffer_binding] 1949 : nullptr; 1950 } 1951 1952 static Buffer* get_pixel_unpack_buffer() { 1953 return ctx->pixel_unpack_buffer_binding 1954 ? &ctx->buffers[ctx->pixel_unpack_buffer_binding] 1955 : nullptr; 1956 } 1957 1958 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, 1959 GLsizei width, GLsizei height, GLenum format, GLenum ty, 1960 void* data) { 1961 if (level != 0) { 1962 assert(false); 1963 return; 1964 } 1965 Buffer* pbo = get_pixel_unpack_buffer(); 1966 if (pbo) { 1967 data = pbo->get_data(data); 1968 } 1969 if (!data) return; 1970 Texture& t = ctx->textures[ctx->get_binding(target)]; 1971 IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height}; 1972 prepare_texture(t, &skip); 1973 assert(xoffset + width <= t.width); 1974 assert(yoffset + height <= t.height); 1975 assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width); 1976 GLsizei row_length = 1977 ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width; 1978 assert(t.internal_format == internal_format_for_data(format, ty)); 1979 int src_bpp = format_requires_conversion(format, t.internal_format) 1980 ? bytes_for_internal_format(format) 1981 : t.bpp(); 1982 if (!src_bpp || !t.buf) return; 1983 convert_copy(format, t.internal_format, 1984 (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(), 1985 (uint8_t*)t.buf, (uint8_t*)t.end_ptr(), (const uint8_t*)data, 1986 row_length * src_bpp, pbo ? (const uint8_t*)pbo->buf : nullptr, 1987 pbo ? (const uint8_t*)pbo->end_ptr() : nullptr, width, height); 1988 } 1989 1990 void TexImage2D(GLenum target, GLint level, GLint internal_format, 1991 GLsizei width, GLsizei height, GLint border, GLenum format, 1992 GLenum ty, void* data) { 1993 if (level != 0) { 1994 assert(false); 1995 return; 1996 } 1997 assert(border == 0); 1998 TexStorage2D(target, 1, internal_format, width, height); 1999 TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data); 2000 } 2001 2002 void GenerateMipmap(UNUSED GLenum target) { 2003 // TODO: support mipmaps 2004 } 2005 2006 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) { 2007 Texture& t = ctx->textures[texid]; 2008 switch (pname) { 2009 case GL_TEXTURE_WRAP_S: 2010 assert(param == GL_CLAMP_TO_EDGE); 2011 break; 2012 case GL_TEXTURE_WRAP_T: 2013 assert(param == GL_CLAMP_TO_EDGE); 2014 break; 2015 case GL_TEXTURE_MIN_FILTER: 2016 t.min_filter = param; 2017 break; 2018 case GL_TEXTURE_MAG_FILTER: 2019 t.mag_filter = param; 2020 break; 2021 default: 2022 break; 2023 } 2024 } 2025 2026 void TexParameteri(GLenum target, GLenum pname, GLint param) { 2027 SetTextureParameter(ctx->get_binding(target), pname, param); 2028 } 2029 2030 typedef Texture LockedTexture; 2031 2032 // Lock the given texture to prevent modification. 2033 LockedTexture* LockTexture(GLuint texId) { 2034 Texture& tex = ctx->textures[texId]; 2035 if (!tex.buf) { 2036 assert(tex.buf != nullptr); 2037 return nullptr; 2038 } 2039 if (__sync_fetch_and_add(&tex.locked, 1) == 0) { 2040 // If this is the first time locking the texture, flush any delayed clears. 2041 prepare_texture(tex); 2042 } 2043 return (LockedTexture*)&tex; 2044 } 2045 2046 // Lock the given framebuffer's color attachment to prevent modification. 2047 LockedTexture* LockFramebuffer(GLuint fboId) { 2048 Framebuffer& fb = ctx->framebuffers[fboId]; 2049 // Only allow locking a framebuffer if it has a valid color attachment. 2050 if (!fb.color_attachment) { 2051 assert(fb.color_attachment != 0); 2052 return nullptr; 2053 } 2054 return LockTexture(fb.color_attachment); 2055 } 2056 2057 // Reference an already locked resource 2058 void LockResource(LockedTexture* resource) { 2059 if (!resource) { 2060 return; 2061 } 2062 __sync_fetch_and_add(&resource->locked, 1); 2063 } 2064 2065 // Remove a lock on a texture that has been previously locked 2066 int32_t UnlockResource(LockedTexture* resource) { 2067 if (!resource) { 2068 return -1; 2069 } 2070 int32_t locked = __sync_fetch_and_add(&resource->locked, -1); 2071 if (locked <= 0) { 2072 // The lock should always be non-zero before unlocking. 2073 assert(0); 2074 } else if (locked == 1 && resource->zombie()) { 2075 // If the resource is being kept alive by locks and this is the last lock, 2076 // then delete the resource now. 2077 delete resource; 2078 } 2079 return locked - 1; 2080 } 2081 2082 void GenTextures(int n, GLuint* result) { 2083 for (int i = 0; i < n; i++) { 2084 Texture t; 2085 result[i] = ctx->textures.insert(t); 2086 } 2087 } 2088 2089 void DeleteTexture(GLuint n) { 2090 if (!n) { 2091 return; 2092 } 2093 LockedTexture* tex = (LockedTexture*)ctx->textures.find(n); 2094 if (!tex) { 2095 return; 2096 } 2097 // Lock the texture so that it can't be deleted by another thread yet. 2098 LockResource(tex); 2099 // Forget the existing binding to the texture but keep it alive in case there 2100 // are any other locks on it. 2101 if (ctx->textures.erase(n, false)) { 2102 for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) { 2103 ctx->texture_units[i].unlink(n); 2104 } 2105 } 2106 // Mark the texture as a zombie so that it will be freed if there are no other 2107 // existing locks on it. 2108 tex->set_zombie(true); 2109 if (int32_t locked = UnlockResource(tex)) { 2110 debugf("DeleteTexture(%u) with %d locks\n", n, locked); 2111 } 2112 } 2113 2114 void GenRenderbuffers(int n, GLuint* result) { 2115 for (int i = 0; i < n; i++) { 2116 Renderbuffer r; 2117 result[i] = ctx->renderbuffers.insert(r); 2118 } 2119 } 2120 2121 void Renderbuffer::on_erase() { 2122 for (auto* fb : ctx->framebuffers) { 2123 if (fb) { 2124 unlink(fb->color_attachment, texture); 2125 unlink(fb->depth_attachment, texture); 2126 } 2127 } 2128 DeleteTexture(texture); 2129 } 2130 2131 void DeleteRenderbuffer(GLuint n) { 2132 if (n && ctx->renderbuffers.erase(n)) { 2133 unlink(ctx->renderbuffer_binding, n); 2134 } 2135 } 2136 2137 void GenFramebuffers(int n, GLuint* result) { 2138 for (int i = 0; i < n; i++) { 2139 Framebuffer f; 2140 result[i] = ctx->framebuffers.insert(f); 2141 } 2142 } 2143 2144 void DeleteFramebuffer(GLuint n) { 2145 if (n && ctx->framebuffers.erase(n)) { 2146 unlink(ctx->read_framebuffer_binding, n); 2147 unlink(ctx->draw_framebuffer_binding, n); 2148 } 2149 } 2150 2151 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width, 2152 GLsizei height) { 2153 // Just refer a renderbuffer to a texture to simplify things for now... 2154 Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)]; 2155 if (!r.texture) { 2156 GenTextures(1, &r.texture); 2157 } 2158 switch (internal_format) { 2159 case GL_DEPTH_COMPONENT: 2160 case GL_DEPTH_COMPONENT16: 2161 case GL_DEPTH_COMPONENT24: 2162 case GL_DEPTH_COMPONENT32: 2163 // Force depth format to 24 bits... 2164 internal_format = GL_DEPTH_COMPONENT24; 2165 break; 2166 } 2167 set_tex_storage(ctx->textures[r.texture], internal_format, width, height); 2168 } 2169 2170 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized, 2171 GLsizei stride, GLuint offset) { 2172 // debugf("cva: %d\n", ctx->current_vertex_array); 2173 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2174 if (index >= NULL_ATTRIB) { 2175 assert(0); 2176 return; 2177 } 2178 VertexAttrib& va = v.attribs[index]; 2179 va.size = size * bytes_per_type(type); 2180 va.type = type; 2181 va.normalized = normalized; 2182 va.stride = stride; 2183 va.offset = offset; 2184 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding]; 2185 va.vertex_buffer = ctx->array_buffer_binding; 2186 va.vertex_array = ctx->current_vertex_array; 2187 ctx->validate_vertex_array = true; 2188 } 2189 2190 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride, 2191 GLuint offset) { 2192 // debugf("cva: %d\n", ctx->current_vertex_array); 2193 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2194 if (index >= NULL_ATTRIB) { 2195 assert(0); 2196 return; 2197 } 2198 VertexAttrib& va = v.attribs[index]; 2199 va.size = size * bytes_per_type(type); 2200 va.type = type; 2201 va.normalized = false; 2202 va.stride = stride; 2203 va.offset = offset; 2204 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding]; 2205 va.vertex_buffer = ctx->array_buffer_binding; 2206 va.vertex_array = ctx->current_vertex_array; 2207 ctx->validate_vertex_array = true; 2208 } 2209 2210 void EnableVertexAttribArray(GLuint index) { 2211 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2212 if (index >= NULL_ATTRIB) { 2213 assert(0); 2214 return; 2215 } 2216 VertexAttrib& va = v.attribs[index]; 2217 if (!va.enabled) { 2218 ctx->validate_vertex_array = true; 2219 } 2220 va.enabled = true; 2221 v.max_attrib = max(v.max_attrib, (int)index); 2222 } 2223 2224 void DisableVertexAttribArray(GLuint index) { 2225 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2226 if (index >= NULL_ATTRIB) { 2227 assert(0); 2228 return; 2229 } 2230 VertexAttrib& va = v.attribs[index]; 2231 if (va.enabled) { 2232 ctx->validate_vertex_array = true; 2233 } 2234 va.disable(); 2235 } 2236 2237 void VertexAttribDivisor(GLuint index, GLuint divisor) { 2238 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2239 // Only support divisor being 0 (per-vertex) or 1 (per-instance). 2240 if (index >= NULL_ATTRIB || divisor > 1) { 2241 assert(0); 2242 return; 2243 } 2244 VertexAttrib& va = v.attribs[index]; 2245 va.divisor = divisor; 2246 } 2247 2248 void BufferData(GLenum target, GLsizeiptr size, void* data, 2249 UNUSED GLenum usage) { 2250 Buffer& b = ctx->buffers[ctx->get_binding(target)]; 2251 if (size != b.size) { 2252 if (!b.allocate(size)) { 2253 out_of_memory(); 2254 } 2255 ctx->validate_vertex_array = true; 2256 } 2257 if (data && b.buf && size <= b.size) { 2258 memcpy(b.buf, data, size); 2259 } 2260 } 2261 2262 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, 2263 void* data) { 2264 if (offset < 0) { 2265 assert(0); 2266 return; 2267 } 2268 GLsizeiptr uOffset = offset; 2269 Buffer& b = ctx->buffers[ctx->get_binding(target)]; 2270 assert(uOffset < b.size && size <= b.size - uOffset); 2271 if (data && b.buf && uOffset < b.size && size <= b.size - uOffset) { 2272 memcpy(&b.buf[uOffset], data, size); 2273 } 2274 } 2275 2276 void* MapBuffer(GLenum target, UNUSED GLbitfield access) { 2277 Buffer& b = ctx->buffers[ctx->get_binding(target)]; 2278 return b.buf; 2279 } 2280 2281 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, 2282 UNUSED GLbitfield access) { 2283 Buffer& b = ctx->buffers[ctx->get_binding(target)]; 2284 if (!b.buf || offset < 0 || length == 0) { 2285 return nullptr; 2286 } 2287 2288 GLsizeiptr uOffset = offset; 2289 if (uOffset >= b.size || length > b.size - uOffset) { 2290 return nullptr; 2291 } 2292 2293 return b.buf + offset; 2294 } 2295 2296 GLboolean UnmapBuffer(GLenum target) { 2297 Buffer& b = ctx->buffers[ctx->get_binding(target)]; 2298 return b.buf != nullptr; 2299 } 2300 2301 void Uniform1i(GLint location, GLint V0) { 2302 // debugf("tex: %d\n", (int)ctx->textures.size); 2303 if (vertex_shader) { 2304 vertex_shader->set_uniform_1i(location, V0); 2305 } 2306 } 2307 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) { 2308 assert(count == 1); 2309 if (vertex_shader) { 2310 vertex_shader->set_uniform_4fv(location, v); 2311 } 2312 } 2313 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose, 2314 const GLfloat* value) { 2315 assert(count == 1); 2316 assert(!transpose); 2317 if (vertex_shader) { 2318 vertex_shader->set_uniform_matrix4fv(location, value); 2319 } 2320 } 2321 2322 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, 2323 GLuint texture, GLint level) { 2324 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER); 2325 assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE); 2326 assert(level == 0); 2327 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)]; 2328 if (attachment == GL_COLOR_ATTACHMENT0) { 2329 fb.color_attachment = texture; 2330 } else if (attachment == GL_DEPTH_ATTACHMENT) { 2331 fb.depth_attachment = texture; 2332 } else { 2333 assert(0); 2334 } 2335 } 2336 2337 void FramebufferRenderbuffer(GLenum target, GLenum attachment, 2338 GLenum renderbuffertarget, GLuint renderbuffer) { 2339 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER); 2340 assert(renderbuffertarget == GL_RENDERBUFFER); 2341 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)]; 2342 Renderbuffer& rb = ctx->renderbuffers[renderbuffer]; 2343 if (attachment == GL_COLOR_ATTACHMENT0) { 2344 fb.color_attachment = rb.texture; 2345 } else if (attachment == GL_DEPTH_ATTACHMENT) { 2346 fb.depth_attachment = rb.texture; 2347 } else { 2348 assert(0); 2349 } 2350 } 2351 2352 } // extern "C" 2353 2354 static inline Framebuffer* get_framebuffer(GLenum target, 2355 bool fallback = false) { 2356 if (target == GL_FRAMEBUFFER) { 2357 target = GL_DRAW_FRAMEBUFFER; 2358 } 2359 Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target)); 2360 if (fallback && !fb) { 2361 // If the specified framebuffer isn't found and a fallback is requested, 2362 // use the default framebuffer. 2363 fb = &ctx->framebuffers[0]; 2364 } 2365 return fb; 2366 } 2367 2368 template <typename T> 2369 static inline void fill_n(T* dst, size_t n, T val) { 2370 for (T* end = &dst[n]; dst < end; dst++) *dst = val; 2371 } 2372 2373 #if USE_SSE2 2374 template <> 2375 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) { 2376 __asm__ __volatile__("rep stosl\n" 2377 : "+D"(dst), "+c"(n) 2378 : "a"(val) 2379 : "memory", "cc"); 2380 } 2381 #endif 2382 2383 static inline uint32_t clear_chunk(uint8_t value) { 2384 return uint32_t(value) * 0x01010101U; 2385 } 2386 2387 static inline uint32_t clear_chunk(uint16_t value) { 2388 return uint32_t(value) | (uint32_t(value) << 16); 2389 } 2390 2391 static inline uint32_t clear_chunk(uint32_t value) { return value; } 2392 2393 template <typename T> 2394 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) { 2395 const size_t N = sizeof(uint32_t) / sizeof(T); 2396 // fill any leading unaligned values 2397 if (N > 1) { 2398 size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T); 2399 if (align <= len) { 2400 fill_n(buf, align, value); 2401 len -= align; 2402 buf += align; 2403 } 2404 } 2405 // fill as many aligned chunks as possible 2406 fill_n((uint32_t*)buf, len / N, chunk); 2407 // fill any remaining values 2408 if (N > 1) { 2409 fill_n(buf + (len & ~(N - 1)), len & (N - 1), value); 2410 } 2411 } 2412 2413 template <typename T> 2414 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0, 2415 int skip_end = 0) { 2416 if (!t.buf) return; 2417 skip_start = max(skip_start, bb.x0); 2418 skip_end = max(skip_end, skip_start); 2419 assert(sizeof(T) == t.bpp()); 2420 size_t stride = t.stride(); 2421 // When clearing multiple full-width rows, collapse them into a single large 2422 // "row" to avoid redundant setup from clearing each row individually. Note 2423 // that we can only safely do this if the stride is tightly packed. 2424 if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end && 2425 (t.should_free() || stride == t.width * sizeof(T))) { 2426 bb.x1 += (stride / sizeof(T)) * (bb.height() - 1); 2427 bb.y1 = bb.y0 + 1; 2428 } 2429 T* buf = (T*)t.sample_ptr(bb.x0, bb.y0); 2430 uint32_t chunk = clear_chunk(value); 2431 for (int rows = bb.height(); rows > 0; rows--) { 2432 if (bb.x0 < skip_start) { 2433 clear_row(buf, skip_start - bb.x0, value, chunk); 2434 } 2435 if (skip_end < bb.x1) { 2436 clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk); 2437 } 2438 buf += stride / sizeof(T); 2439 } 2440 } 2441 2442 template <typename T> 2443 static inline void force_clear_row(Texture& t, int y, int skip_start = 0, 2444 int skip_end = 0) { 2445 assert(t.buf != nullptr); 2446 assert(sizeof(T) == t.bpp()); 2447 assert(skip_start <= skip_end); 2448 T* buf = (T*)t.sample_ptr(0, y); 2449 uint32_t chunk = clear_chunk((T)t.clear_val); 2450 if (skip_start > 0) { 2451 clear_row<T>(buf, skip_start, t.clear_val, chunk); 2452 } 2453 if (skip_end < t.width) { 2454 clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk); 2455 } 2456 } 2457 2458 template <typename T> 2459 static void force_clear(Texture& t, const IntRect* skip = nullptr) { 2460 if (!t.delay_clear || !t.cleared_rows) { 2461 return; 2462 } 2463 int y0 = 0; 2464 int y1 = t.height; 2465 int skip_start = 0; 2466 int skip_end = 0; 2467 if (skip) { 2468 y0 = clamp(skip->y0, 0, t.height); 2469 y1 = clamp(skip->y1, y0, t.height); 2470 skip_start = clamp(skip->x0, 0, t.width); 2471 skip_end = clamp(skip->x1, skip_start, t.width); 2472 if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) { 2473 t.disable_delayed_clear(); 2474 return; 2475 } 2476 } 2477 int num_masks = (y1 + 31) / 32; 2478 uint32_t* rows = t.cleared_rows; 2479 for (int i = y0 / 32; i < num_masks; i++) { 2480 uint32_t mask = rows[i]; 2481 if (mask != ~0U) { 2482 rows[i] = ~0U; 2483 int start = i * 32; 2484 while (mask) { 2485 int count = __builtin_ctz(mask); 2486 if (count > 0) { 2487 clear_buffer<T>(t, t.clear_val, 2488 IntRect{0, start, t.width, start + count}, skip_start, 2489 skip_end); 2490 t.delay_clear -= count; 2491 start += count; 2492 mask >>= count; 2493 } 2494 count = __builtin_ctz(mask + 1); 2495 start += count; 2496 mask >>= count; 2497 } 2498 int count = (i + 1) * 32 - start; 2499 if (count > 0) { 2500 clear_buffer<T>(t, t.clear_val, 2501 IntRect{0, start, t.width, start + count}, skip_start, 2502 skip_end); 2503 t.delay_clear -= count; 2504 } 2505 } 2506 } 2507 if (t.delay_clear <= 0) t.disable_delayed_clear(); 2508 } 2509 2510 static void prepare_texture(Texture& t, const IntRect* skip) { 2511 if (t.delay_clear) { 2512 switch (t.internal_format) { 2513 case GL_RGBA8: 2514 force_clear<uint32_t>(t, skip); 2515 break; 2516 case GL_R8: 2517 force_clear<uint8_t>(t, skip); 2518 break; 2519 case GL_RG8: 2520 force_clear<uint16_t>(t, skip); 2521 break; 2522 default: 2523 assert(false); 2524 break; 2525 } 2526 } 2527 } 2528 2529 // Setup a clear on a texture. This may either force an immediate clear or 2530 // potentially punt to a delayed clear, if applicable. 2531 template <typename T> 2532 static void request_clear(Texture& t, T value, const IntRect& scissor) { 2533 // If the clear would require a scissor, force clear anything outside 2534 // the scissor, and then immediately clear anything inside the scissor. 2535 if (!scissor.contains(t.offset_bounds())) { 2536 IntRect skip = scissor - t.offset; 2537 force_clear<T>(t, &skip); 2538 clear_buffer<T>(t, value, skip.intersection(t.bounds())); 2539 } else { 2540 // Do delayed clear for 2D texture without scissor. 2541 t.enable_delayed_clear(value); 2542 } 2543 } 2544 2545 template <typename T> 2546 static inline void request_clear(Texture& t, T value) { 2547 // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to 2548 // the entire texture bounds. 2549 request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds()); 2550 } 2551 2552 extern "C" { 2553 2554 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride, 2555 void* buf) { 2556 Framebuffer& fb = ctx->framebuffers[0]; 2557 if (!fb.color_attachment) { 2558 GenTextures(1, &fb.color_attachment); 2559 } 2560 // If the dimensions or buffer properties changed, we need to reallocate 2561 // the underlying storage for the color buffer texture. 2562 Texture& colortex = ctx->textures[fb.color_attachment]; 2563 set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride); 2564 colortex.offset = IntPoint(x, y); 2565 if (!fb.depth_attachment) { 2566 GenTextures(1, &fb.depth_attachment); 2567 } 2568 // Ensure dimensions of the depth buffer match the color buffer. 2569 Texture& depthtex = ctx->textures[fb.depth_attachment]; 2570 set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height); 2571 depthtex.offset = IntPoint(x, y); 2572 } 2573 2574 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width, 2575 int32_t* height, int32_t* stride) { 2576 Framebuffer* fb = ctx->framebuffers.find(fbo); 2577 if (!fb || !fb->color_attachment) { 2578 return nullptr; 2579 } 2580 Texture& colortex = ctx->textures[fb->color_attachment]; 2581 if (flush) { 2582 prepare_texture(colortex); 2583 } 2584 assert(colortex.offset == IntPoint(0, 0)); 2585 if (width) { 2586 *width = colortex.width; 2587 } 2588 if (height) { 2589 *height = colortex.height; 2590 } 2591 if (stride) { 2592 *stride = colortex.stride(); 2593 } 2594 return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr; 2595 } 2596 2597 void ResolveFramebuffer(GLuint fbo) { 2598 Framebuffer* fb = ctx->framebuffers.find(fbo); 2599 if (!fb || !fb->color_attachment) { 2600 return; 2601 } 2602 Texture& colortex = ctx->textures[fb->color_attachment]; 2603 prepare_texture(colortex); 2604 } 2605 2606 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width, 2607 GLsizei height, GLsizei stride, void* buf, 2608 GLsizei min_width, GLsizei min_height) { 2609 Texture& t = ctx->textures[texid]; 2610 set_tex_storage(t, internal_format, width, height, buf, stride, min_width, 2611 min_height); 2612 } 2613 2614 GLenum CheckFramebufferStatus(GLenum target) { 2615 Framebuffer* fb = get_framebuffer(target); 2616 if (!fb || !fb->color_attachment) { 2617 return GL_FRAMEBUFFER_UNSUPPORTED; 2618 } 2619 return GL_FRAMEBUFFER_COMPLETE; 2620 } 2621 2622 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, 2623 GLint zoffset, GLsizei width, GLsizei height, 2624 GLsizei depth, GLenum format, GLenum type, 2625 const void* data) { 2626 if (level != 0) { 2627 assert(false); 2628 return; 2629 } 2630 Texture& t = ctx->textures[texture]; 2631 assert(!t.locked); 2632 if (width <= 0 || height <= 0 || depth <= 0) { 2633 return; 2634 } 2635 assert(zoffset == 0 && depth == 1); 2636 IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height}; 2637 if (t.internal_format == GL_DEPTH_COMPONENT24) { 2638 uint32_t value = 0xFFFFFF; 2639 switch (format) { 2640 case GL_DEPTH_COMPONENT: 2641 switch (type) { 2642 case GL_DOUBLE: 2643 value = uint32_t(*(const GLdouble*)data * 0xFFFFFF); 2644 break; 2645 case GL_FLOAT: 2646 value = uint32_t(*(const GLfloat*)data * 0xFFFFFF); 2647 break; 2648 default: 2649 assert(false); 2650 break; 2651 } 2652 break; 2653 default: 2654 assert(false); 2655 break; 2656 } 2657 if (t.cleared() && !scissor.contains(t.offset_bounds())) { 2658 // If we need to scissor the clear and the depth buffer was already 2659 // initialized, then just fill runs for that scissor area. 2660 t.fill_depth_runs(value, scissor); 2661 } else { 2662 // Otherwise, the buffer is either uninitialized or the clear would 2663 // encompass the entire buffer. If uninitialized, we can safely fill 2664 // the entire buffer with any value and thus ignore any scissoring. 2665 t.init_depth_runs(value); 2666 } 2667 return; 2668 } 2669 2670 uint32_t color = 0xFF000000; 2671 switch (type) { 2672 case GL_FLOAT: { 2673 const GLfloat* f = (const GLfloat*)data; 2674 Float v = {0.0f, 0.0f, 0.0f, 1.0f}; 2675 switch (format) { 2676 case GL_RGBA: 2677 v.w = f[3]; // alpha 2678 FALLTHROUGH; 2679 case GL_RGB: 2680 v.z = f[2]; // blue 2681 FALLTHROUGH; 2682 case GL_RG: 2683 v.y = f[1]; // green 2684 FALLTHROUGH; 2685 case GL_RED: 2686 v.x = f[0]; // red 2687 break; 2688 default: 2689 assert(false); 2690 break; 2691 } 2692 color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8)); 2693 break; 2694 } 2695 case GL_UNSIGNED_BYTE: { 2696 const GLubyte* b = (const GLubyte*)data; 2697 switch (format) { 2698 case GL_RGBA: 2699 color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24); // alpha 2700 FALLTHROUGH; 2701 case GL_RGB: 2702 color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16); // blue 2703 FALLTHROUGH; 2704 case GL_RG: 2705 color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8); // green 2706 FALLTHROUGH; 2707 case GL_RED: 2708 color = (color & ~0x000000FF) | uint32_t(b[0]); // red 2709 break; 2710 default: 2711 assert(false); 2712 break; 2713 } 2714 break; 2715 } 2716 default: 2717 assert(false); 2718 break; 2719 } 2720 2721 switch (t.internal_format) { 2722 case GL_RGBA8: 2723 // Clear color needs to swizzle to BGRA. 2724 request_clear<uint32_t>(t, 2725 (color & 0xFF00FF00) | 2726 ((color << 16) & 0xFF0000) | 2727 ((color >> 16) & 0xFF), 2728 scissor); 2729 break; 2730 case GL_R8: 2731 request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor); 2732 break; 2733 case GL_RG8: 2734 request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor); 2735 break; 2736 default: 2737 assert(false); 2738 break; 2739 } 2740 } 2741 2742 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type, 2743 const void* data) { 2744 Texture& t = ctx->textures[texture]; 2745 IntRect scissor = t.offset_bounds(); 2746 ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(), 2747 scissor.height(), 1, format, type, data); 2748 } 2749 2750 void Clear(GLbitfield mask) { 2751 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true); 2752 if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) { 2753 Texture& t = ctx->textures[fb.color_attachment]; 2754 IntRect scissor = ctx->scissortest 2755 ? ctx->scissor.intersection(t.offset_bounds()) 2756 : t.offset_bounds(); 2757 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0, 2758 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT, 2759 ctx->clearcolor); 2760 } 2761 if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) { 2762 Texture& t = ctx->textures[fb.depth_attachment]; 2763 IntRect scissor = ctx->scissortest 2764 ? ctx->scissor.intersection(t.offset_bounds()) 2765 : t.offset_bounds(); 2766 ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0, 2767 scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT, 2768 GL_DOUBLE, &ctx->cleardepth); 2769 } 2770 } 2771 2772 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width, 2773 GLsizei height, GLfloat r, GLfloat g, GLfloat b, 2774 GLfloat a) { 2775 GLfloat color[] = {r, g, b, a}; 2776 Framebuffer& fb = ctx->framebuffers[fbo]; 2777 Texture& t = ctx->textures[fb.color_attachment]; 2778 IntRect scissor = 2779 IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection( 2780 t.offset_bounds()); 2781 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0, 2782 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT, 2783 color); 2784 } 2785 2786 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments, 2787 const GLenum* attachments) { 2788 Framebuffer* fb = get_framebuffer(target); 2789 if (!fb || num_attachments <= 0 || !attachments) { 2790 return; 2791 } 2792 for (GLsizei i = 0; i < num_attachments; i++) { 2793 switch (attachments[i]) { 2794 case GL_DEPTH_ATTACHMENT: { 2795 Texture& t = ctx->textures[fb->depth_attachment]; 2796 t.set_cleared(false); 2797 break; 2798 } 2799 case GL_COLOR_ATTACHMENT0: { 2800 Texture& t = ctx->textures[fb->color_attachment]; 2801 t.disable_delayed_clear(); 2802 break; 2803 } 2804 } 2805 } 2806 } 2807 2808 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, 2809 GLenum type, void* data) { 2810 Buffer* pbo = get_pixel_pack_buffer(); 2811 if (pbo) { 2812 data = pbo->get_data(data); 2813 } 2814 if (!data) return; 2815 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER); 2816 if (!fb) return; 2817 assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER || 2818 format == GL_BGRA || format == GL_RG); 2819 Texture& t = ctx->textures[fb->color_attachment]; 2820 if (!t.buf) return; 2821 prepare_texture(t); 2822 // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y, 2823 // width, height, ctx->read_framebuffer_binding, t.internal_format); 2824 x -= t.offset.x; 2825 y -= t.offset.y; 2826 assert(x >= 0 && y >= 0); 2827 assert(x + width <= t.width); 2828 assert(y + height <= t.height); 2829 if (internal_format_for_data(format, type) != t.internal_format) { 2830 debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format, 2831 internal_format_for_data(format, type)); 2832 assert(false); 2833 return; 2834 } 2835 // Only support readback conversions that are reversible 2836 assert(!format_requires_conversion(format, t.internal_format) || 2837 bytes_for_internal_format(format) == t.bpp()); 2838 uint8_t* dest = (uint8_t*)data; 2839 size_t destStride = width * t.bpp(); 2840 if (y < 0) { 2841 dest += -y * destStride; 2842 height += y; 2843 y = 0; 2844 } 2845 if (y + height > t.height) { 2846 height = t.height - y; 2847 } 2848 if (x < 0) { 2849 dest += -x * t.bpp(); 2850 width += x; 2851 x = 0; 2852 } 2853 if (x + width > t.width) { 2854 width = t.width - x; 2855 } 2856 if (width <= 0 || height <= 0) { 2857 return; 2858 } 2859 convert_copy(format, t.internal_format, dest, destStride, 2860 pbo ? (uint8_t*)pbo->buf : nullptr, 2861 pbo ? (uint8_t*)pbo->end_ptr() : nullptr, 2862 (const uint8_t*)t.sample_ptr(x, y), t.stride(), 2863 (const uint8_t*)t.buf, (const uint8_t*)t.end_ptr(), width, 2864 height); 2865 } 2866 2867 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel, 2868 GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, 2869 GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX, 2870 GLint dstY, GLint dstZ, GLsizei srcWidth, 2871 GLsizei srcHeight, GLsizei srcDepth) { 2872 assert(srcLevel == 0 && dstLevel == 0); 2873 assert(srcZ == 0 && srcDepth == 1 && dstZ == 0); 2874 if (srcTarget == GL_RENDERBUFFER) { 2875 Renderbuffer& rb = ctx->renderbuffers[srcName]; 2876 srcName = rb.texture; 2877 } 2878 if (dstTarget == GL_RENDERBUFFER) { 2879 Renderbuffer& rb = ctx->renderbuffers[dstName]; 2880 dstName = rb.texture; 2881 } 2882 Texture& srctex = ctx->textures[srcName]; 2883 if (!srctex.buf) return; 2884 prepare_texture(srctex); 2885 Texture& dsttex = ctx->textures[dstName]; 2886 if (!dsttex.buf) return; 2887 assert(!dsttex.locked); 2888 IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight}; 2889 prepare_texture(dsttex, &skip); 2890 assert(srctex.internal_format == dsttex.internal_format); 2891 assert(srcWidth >= 0); 2892 assert(srcHeight >= 0); 2893 assert(srcX + srcWidth <= srctex.width); 2894 assert(srcY + srcHeight <= srctex.height); 2895 assert(dstX + srcWidth <= dsttex.width); 2896 assert(dstY + srcHeight <= dsttex.height); 2897 int bpp = srctex.bpp(); 2898 int src_stride = srctex.stride(); 2899 int dest_stride = dsttex.stride(); 2900 char* dest = dsttex.sample_ptr(dstX, dstY); 2901 const char* src = srctex.sample_ptr(srcX, srcY); 2902 for (int y = 0; y < srcHeight; y++) { 2903 char* dst_ptr = dest; 2904 const char* src_ptr = src; 2905 size_t len = size_t(srcWidth) * bpp; 2906 if (clip_ptrs_against_bounds(dst_ptr, dsttex.buf, dsttex.end_ptr(), src_ptr, 2907 srctex.buf, srctex.end_ptr(), len) > 0) { 2908 break; 2909 } 2910 if (len) { 2911 memcpy(dst_ptr, src_ptr, len); 2912 } 2913 dest += dest_stride; 2914 src += src_stride; 2915 } 2916 } 2917 2918 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset, 2919 GLint yoffset, GLint x, GLint y, GLsizei width, 2920 GLsizei height) { 2921 assert(level == 0); 2922 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER); 2923 if (!fb) return; 2924 CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0, 2925 ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset, 2926 0, width, height, 1); 2927 } 2928 2929 } // extern "C" 2930 2931 #include "blend.h" 2932 #include "composite.h" 2933 #include "swgl_ext.h" 2934 2935 #pragma GCC diagnostic push 2936 #pragma GCC diagnostic ignored "-Wuninitialized" 2937 #pragma GCC diagnostic ignored "-Wunused-function" 2938 #pragma GCC diagnostic ignored "-Wunused-parameter" 2939 #pragma GCC diagnostic ignored "-Wunused-variable" 2940 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" 2941 #ifdef __clang__ 2942 # pragma GCC diagnostic ignored "-Wunused-private-field" 2943 #else 2944 # pragma GCC diagnostic ignored "-Wunused-but-set-variable" 2945 #endif 2946 #include "load_shader.h" 2947 #pragma GCC diagnostic pop 2948 2949 #include "rasterize.h" 2950 2951 void VertexArray::validate() { 2952 int last_enabled = -1; 2953 for (int i = 0; i <= max_attrib; i++) { 2954 VertexAttrib& attr = attribs[i]; 2955 if (attr.enabled) { 2956 // VertexArray &v = ctx->vertex_arrays[attr.vertex_array]; 2957 Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer]; 2958 attr.buf = vertex_buf.buf; 2959 attr.buf_size = vertex_buf.size; 2960 // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride, 2961 // attr.offset, attr.divisor); 2962 last_enabled = i; 2963 } 2964 } 2965 max_attrib = last_enabled; 2966 } 2967 2968 extern "C" { 2969 2970 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, 2971 GLintptr offset, GLsizei instancecount) { 2972 if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader || 2973 !fragment_shader) { 2974 return; 2975 } 2976 2977 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true); 2978 if (!fb.color_attachment) { 2979 return; 2980 } 2981 Texture& colortex = ctx->textures[fb.color_attachment]; 2982 if (!colortex.buf) { 2983 return; 2984 } 2985 assert(!colortex.locked); 2986 assert(colortex.internal_format == GL_RGBA8 || 2987 colortex.internal_format == GL_R8); 2988 Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0]; 2989 if (depthtex.buf) { 2990 assert(depthtex.internal_format == GL_DEPTH_COMPONENT24); 2991 assert(colortex.width == depthtex.width && 2992 colortex.height == depthtex.height); 2993 assert(colortex.offset == depthtex.offset); 2994 } 2995 2996 // debugf("current_vertex_array %d\n", ctx->current_vertex_array); 2997 // debugf("indices size: %d\n", indices_buf.size); 2998 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array]; 2999 if (ctx->validate_vertex_array) { 3000 ctx->validate_vertex_array = false; 3001 v.validate(); 3002 } 3003 3004 #ifdef PRINT_TIMINGS 3005 uint64_t start = get_time_value(); 3006 #endif 3007 3008 ctx->shaded_rows = 0; 3009 ctx->shaded_pixels = 0; 3010 3011 vertex_shader->init_batch(); 3012 3013 switch (type) { 3014 case GL_UNSIGNED_SHORT: 3015 assert(mode == GL_TRIANGLES); 3016 draw_elements<uint16_t>(count, instancecount, offset, v, colortex, 3017 depthtex); 3018 break; 3019 case GL_UNSIGNED_INT: 3020 assert(mode == GL_TRIANGLES); 3021 draw_elements<uint32_t>(count, instancecount, offset, v, colortex, 3022 depthtex); 3023 break; 3024 case GL_NONE: 3025 // Non-standard GL extension - if element type is GL_NONE, then we don't 3026 // use any element buffer and behave as if DrawArrays was called instead. 3027 for (GLsizei instance = 0; instance < instancecount; instance++) { 3028 switch (mode) { 3029 case GL_LINES: 3030 for (GLsizei i = 0; i + 2 <= count; i += 2) { 3031 vertex_shader->load_attribs(v.attribs, offset + i, instance, 2); 3032 draw_quad(2, colortex, depthtex); 3033 } 3034 break; 3035 case GL_TRIANGLES: 3036 for (GLsizei i = 0; i + 3 <= count; i += 3) { 3037 vertex_shader->load_attribs(v.attribs, offset + i, instance, 3); 3038 draw_quad(3, colortex, depthtex); 3039 } 3040 break; 3041 default: 3042 assert(false); 3043 break; 3044 } 3045 } 3046 break; 3047 default: 3048 assert(false); 3049 break; 3050 } 3051 3052 if (ctx->samples_passed_query) { 3053 Query& q = ctx->queries[ctx->samples_passed_query]; 3054 q.value += ctx->shaded_pixels; 3055 } 3056 3057 #ifdef PRINT_TIMINGS 3058 uint64_t end = get_time_value(); 3059 printf( 3060 "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, " 3061 "%fns/pixel)\n", 3062 double(end - start) / (1000. * 1000.), 3063 ctx->programs[ctx->current_program].impl->get_name(), instancecount, 3064 ctx->shaded_pixels, ctx->shaded_rows, 3065 double(ctx->shaded_pixels) / ctx->shaded_rows, 3066 double(end - start) / max(ctx->shaded_pixels, 1)); 3067 #endif 3068 } 3069 3070 void Finish() { 3071 #ifdef PRINT_TIMINGS 3072 printf("Finish\n"); 3073 #endif 3074 } 3075 3076 void MakeCurrent(Context* c) { 3077 if (ctx == c) { 3078 return; 3079 } 3080 ctx = c; 3081 setup_program(ctx ? ctx->current_program : 0); 3082 } 3083 3084 Context* CreateContext() { return new Context; } 3085 3086 void ReferenceContext(Context* c) { 3087 if (!c) { 3088 return; 3089 } 3090 ++c->references; 3091 } 3092 3093 void DestroyContext(Context* c) { 3094 if (!c) { 3095 return; 3096 } 3097 assert(c->references > 0); 3098 --c->references; 3099 if (c->references > 0) { 3100 return; 3101 } 3102 if (ctx == c) { 3103 MakeCurrent(nullptr); 3104 } 3105 delete c; 3106 } 3107 3108 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(const void*)) { 3109 size_t size = 0; 3110 if (ctx) { 3111 for (auto& t : ctx->textures) { 3112 if (t && t->should_free()) { 3113 size += size_of_op(t->buf); 3114 } 3115 } 3116 } 3117 return size; 3118 } 3119 } // extern "C"