tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

gl.cc (93598B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include <stdlib.h>
      6 #include <stdint.h>
      7 #include <string.h>
      8 #include <assert.h>
      9 #include <stdio.h>
     10 #include <math.h>
     11 
     12 #ifdef __MACH__
     13 #  include <mach/mach.h>
     14 #  include <mach/mach_time.h>
     15 #else
     16 #  include <time.h>
     17 #endif
     18 
     19 #ifdef NDEBUG
     20 #  define debugf(...)
     21 #else
     22 #  define debugf(...) printf(__VA_ARGS__)
     23 #endif
     24 
     25 // #define PRINT_TIMINGS
     26 
     27 #ifdef _WIN32
     28 #  define ALWAYS_INLINE __forceinline
     29 #  define NO_INLINE __declspec(noinline)
     30 
     31 // Including Windows.h brings a huge amount of namespace polution so just
     32 // define a couple of things manually
     33 typedef int BOOL;
     34 #  define WINAPI __stdcall
     35 #  define DECLSPEC_IMPORT __declspec(dllimport)
     36 #  define WINBASEAPI DECLSPEC_IMPORT
     37 typedef unsigned long DWORD;
     38 typedef long LONG;
     39 typedef __int64 LONGLONG;
     40 #  define DUMMYSTRUCTNAME
     41 
     42 typedef union _LARGE_INTEGER {
     43  struct {
     44    DWORD LowPart;
     45    LONG HighPart;
     46  } DUMMYSTRUCTNAME;
     47  struct {
     48    DWORD LowPart;
     49    LONG HighPart;
     50  } u;
     51  LONGLONG QuadPart;
     52 } LARGE_INTEGER;
     53 extern "C" {
     54 WINBASEAPI BOOL WINAPI
     55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
     56 
     57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
     58 }
     59 
     60 #else
     61 // GCC is slower when dealing with always_inline, especially in debug builds.
     62 // When using Clang, use always_inline more aggressively.
     63 #  if defined(__clang__) || defined(NDEBUG)
     64 #    define ALWAYS_INLINE __attribute__((always_inline)) inline
     65 #  else
     66 #    define ALWAYS_INLINE inline
     67 #  endif
     68 #  define NO_INLINE __attribute__((noinline))
     69 #endif
     70 
     71 // Some functions may cause excessive binary bloat if inlined in debug or with
     72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
     73 #if defined(__clang__) && defined(NDEBUG)
     74 #  define PREFER_INLINE ALWAYS_INLINE
     75 #else
     76 #  define PREFER_INLINE inline
     77 #endif
     78 
     79 #define UNREACHABLE __builtin_unreachable()
     80 
     81 #define UNUSED [[maybe_unused]]
     82 
     83 #define FALLTHROUGH [[fallthrough]]
     84 
     85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
     86 #  define IMPLICIT __attribute__((annotate("moz_implicit")))
     87 #else
     88 #  define IMPLICIT
     89 #endif
     90 
     91 #if defined(_MSC_VER)
     92 #  define ALIGNED_DECL(_align, _type) __declspec(align(_align)) _type
     93 #else
     94 #  define ALIGNED_DECL(_align, _type) _type __attribute__((aligned(_align)))
     95 #endif
     96 
     97 #include "gl_defs.h"
     98 #include "glsl.h"
     99 #include "program.h"
    100 #include "texture.h"
    101 
    102 using namespace glsl;
    103 
    104 typedef ivec2_scalar IntPoint;
    105 
    106 struct IntRect {
    107  int x0;
    108  int y0;
    109  int x1;
    110  int y1;
    111 
    112  IntRect() : x0(0), y0(0), x1(0), y1(0) {}
    113  IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
    114  IntRect(IntPoint origin, IntPoint size)
    115      : x0(origin.x),
    116        y0(origin.y),
    117        x1(origin.x + size.x),
    118        y1(origin.y + size.y) {}
    119 
    120  int width() const { return x1 - x0; }
    121  int height() const { return y1 - y0; }
    122  bool is_empty() const { return width() <= 0 || height() <= 0; }
    123 
    124  IntPoint origin() const { return IntPoint(x0, y0); }
    125 
    126  bool same_size(const IntRect& o) const {
    127    return width() == o.width() && height() == o.height();
    128  }
    129 
    130  bool contains(const IntRect& o) const {
    131    return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
    132  }
    133 
    134  IntRect& intersect(const IntRect& o) {
    135    x0 = max(x0, o.x0);
    136    y0 = max(y0, o.y0);
    137    x1 = min(x1, o.x1);
    138    y1 = min(y1, o.y1);
    139    return *this;
    140  }
    141 
    142  IntRect intersection(const IntRect& o) {
    143    IntRect result = *this;
    144    result.intersect(o);
    145    return result;
    146  }
    147 
    148  // Scale from source-space to dest-space, optionally rounding inward
    149  IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
    150                 bool roundIn = false) {
    151    x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
    152    y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
    153    x1 = (x1 * dstWidth) / srcWidth;
    154    y1 = (y1 * dstHeight) / srcHeight;
    155    return *this;
    156  }
    157 
    158  // Flip the rect's Y coords around inflection point at Y=offset
    159  void invert_y(int offset) {
    160    y0 = offset - y0;
    161    y1 = offset - y1;
    162    swap(y0, y1);
    163  }
    164 
    165  IntRect& offset(const IntPoint& o) {
    166    x0 += o.x;
    167    y0 += o.y;
    168    x1 += o.x;
    169    y1 += o.y;
    170    return *this;
    171  }
    172 
    173  IntRect operator+(const IntPoint& o) const {
    174    return IntRect(*this).offset(o);
    175  }
    176  IntRect operator-(const IntPoint& o) const {
    177    return IntRect(*this).offset(-o);
    178  }
    179 };
    180 
    181 typedef vec2_scalar Point2D;
    182 typedef vec4_scalar Point3D;
    183 
    184 struct IntRange {
    185  int start;
    186  int end;
    187 
    188  int len() const { return end - start; }
    189 
    190  IntRange intersect(IntRange r) const {
    191    return {max(start, r.start), min(end, r.end)};
    192  }
    193 };
    194 
    195 struct FloatRange {
    196  float start;
    197  float end;
    198 
    199  float clip(float x) const { return clamp(x, start, end); }
    200 
    201  FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
    202 
    203  FloatRange merge(FloatRange r) const {
    204    return {min(start, r.start), max(end, r.end)};
    205  }
    206 
    207  IntRange round() const {
    208    return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
    209  }
    210 
    211  IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
    212 };
    213 
    214 template <typename P>
    215 static inline FloatRange x_range(P p0, P p1) {
    216  return {min(p0.x, p1.x), max(p0.x, p1.x)};
    217 }
    218 
    219 struct VertexAttrib {
    220  size_t size = 0;  // in bytes
    221  GLenum type = 0;
    222  bool normalized = false;
    223  GLsizei stride = 0;
    224  GLuint offset = 0;
    225  bool enabled = false;
    226  GLuint divisor = 0;
    227  int vertex_array = 0;
    228  int vertex_buffer = 0;
    229  char* buf = nullptr;  // XXX: this can easily dangle
    230  size_t buf_size = 0;  // this will let us bounds check
    231 
    232  // Mark the buffer as invalid so we don't accidentally use stale data.
    233  void disable() {
    234    enabled = false;
    235    buf = nullptr;
    236    buf_size = 0;
    237  }
    238 };
    239 
    240 static int bytes_for_internal_format(GLenum internal_format) {
    241  switch (internal_format) {
    242    case GL_RGBA32F:
    243      return 4 * 4;
    244    case GL_RGBA32I:
    245    case GL_RGBA_INTEGER:
    246      return 4 * 4;
    247    case GL_RGBA8:
    248    case GL_BGRA8:
    249    case GL_RGBA:
    250    case GL_BGRA:
    251      return 4;
    252    case GL_R8:
    253    case GL_RED:
    254      return 1;
    255    case GL_RG8:
    256    case GL_RG:
    257      return 2;
    258    case GL_DEPTH_COMPONENT:
    259    case GL_DEPTH_COMPONENT16:
    260    case GL_DEPTH_COMPONENT24:
    261    case GL_DEPTH_COMPONENT32:
    262      return 4;
    263    case GL_RGB_RAW_422_APPLE:
    264      return 2;
    265    case GL_R16:
    266      return 2;
    267    case GL_RG16:
    268      return 4;
    269    default:
    270      debugf("internal format: %x\n", internal_format);
    271      assert(0);
    272      return 0;
    273  }
    274 }
    275 
    276 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
    277 
    278 static TextureFormat gl_format_to_texture_format(int type) {
    279  switch (type) {
    280    case GL_RGBA32F:
    281      return TextureFormat::RGBA32F;
    282    case GL_RGBA32I:
    283      return TextureFormat::RGBA32I;
    284    case GL_RGBA8:
    285      return TextureFormat::RGBA8;
    286    case GL_R8:
    287      return TextureFormat::R8;
    288    case GL_RG8:
    289      return TextureFormat::RG8;
    290    case GL_R16:
    291      return TextureFormat::R16;
    292    case GL_RG16:
    293      return TextureFormat::RG16;
    294    case GL_RGB_RAW_422_APPLE:
    295      return TextureFormat::YUY2;
    296    default:
    297      assert(0);
    298      return TextureFormat::RGBA8;
    299  }
    300 }
    301 
    302 struct Query {
    303  uint64_t value = 0;
    304 };
    305 
    306 struct Buffer {
    307  char* buf = nullptr;
    308  size_t size = 0;
    309  size_t capacity = 0;
    310 
    311  // Returns true if re-allocation succeeded, false otherwise...
    312  bool allocate(size_t new_size) {
    313    // If the size remains unchanged, don't allocate anything.
    314    if (new_size == size) {
    315      return true;
    316    }
    317    // If the new size is within the existing capacity of the buffer, just
    318    // reuse the existing buffer.
    319    if (new_size <= capacity) {
    320      size = new_size;
    321      return true;
    322    }
    323    // Otherwise we need to reallocate the buffer to hold up to the requested
    324    // larger size.
    325    char* new_buf = (char*)realloc(buf, new_size);
    326    assert(new_buf);
    327    if (!new_buf) {
    328      // If we fail, null out the buffer rather than leave around the old
    329      // allocation state.
    330      cleanup();
    331      return false;
    332    }
    333    // The reallocation succeeded, so install the buffer.
    334    buf = new_buf;
    335    size = new_size;
    336    capacity = new_size;
    337    return true;
    338  }
    339 
    340  void cleanup() {
    341    if (buf) {
    342      free(buf);
    343      buf = nullptr;
    344      size = 0;
    345      capacity = 0;
    346    }
    347  }
    348 
    349  ~Buffer() { cleanup(); }
    350 
    351  char* end_ptr() const { return buf ? buf + size : nullptr; }
    352 
    353  void* get_data(void* data) {
    354    if (buf) {
    355      size_t offset = (size_t)data;
    356      if (offset < size) {
    357        return buf + offset;
    358      }
    359    }
    360    return nullptr;
    361  }
    362 };
    363 
    364 struct Framebuffer {
    365  GLuint color_attachment = 0;
    366  GLuint depth_attachment = 0;
    367 };
    368 
    369 struct Renderbuffer {
    370  GLuint texture = 0;
    371 
    372  void on_erase();
    373 };
    374 
    375 TextureFilter gl_filter_to_texture_filter(int type) {
    376  switch (type) {
    377    case GL_NEAREST:
    378      return TextureFilter::NEAREST;
    379    case GL_NEAREST_MIPMAP_LINEAR:
    380      return TextureFilter::NEAREST;
    381    case GL_NEAREST_MIPMAP_NEAREST:
    382      return TextureFilter::NEAREST;
    383    case GL_LINEAR:
    384      return TextureFilter::LINEAR;
    385    case GL_LINEAR_MIPMAP_LINEAR:
    386      return TextureFilter::LINEAR;
    387    case GL_LINEAR_MIPMAP_NEAREST:
    388      return TextureFilter::LINEAR;
    389    default:
    390      assert(0);
    391      return TextureFilter::NEAREST;
    392  }
    393 }
    394 
    395 struct Texture {
    396  GLenum internal_format = 0;
    397  int width = 0;
    398  int height = 0;
    399  char* buf = nullptr;
    400  size_t buf_size = 0;
    401  uint32_t buf_stride = 0;
    402  uint8_t buf_bpp = 0;
    403  GLenum min_filter = GL_NEAREST;
    404  GLenum mag_filter = GL_LINEAR;
    405  // The number of active locks on this texture. If this texture has any active
    406  // locks, we need to disallow modifying or destroying the texture as it may
    407  // be accessed by other threads where modifications could lead to races.
    408  int32_t locked = 0;
    409  // When used as an attachment of a framebuffer, rendering to the texture
    410  // behaves as if it is located at the given offset such that the offset is
    411  // subtracted from all transformed vertexes after the viewport is applied.
    412  IntPoint offset;
    413 
    414  enum FLAGS {
    415    // If the buffer is internally-allocated by SWGL
    416    SHOULD_FREE = 1 << 1,
    417    // If the buffer has been cleared to initialize it. Currently this is only
    418    // utilized by depth buffers which need to know when depth runs have reset
    419    // to a valid row state. When unset, the depth runs may contain garbage.
    420    CLEARED = 1 << 2,
    421    // The texture was deleted while still locked and must stay alive until all
    422    // locks are released.
    423    ZOMBIE = 1 << 3,
    424  };
    425  int flags = SHOULD_FREE;
    426  bool should_free() const { return bool(flags & SHOULD_FREE); }
    427  bool cleared() const { return bool(flags & CLEARED); }
    428  bool zombie() const { return bool(flags & ZOMBIE); }
    429 
    430  void set_flag(int flag, bool val) {
    431    if (val) {
    432      flags |= flag;
    433    } else {
    434      flags &= ~flag;
    435    }
    436  }
    437  void set_should_free(bool val) {
    438    // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
    439    // might accidentally mistakenly realloc an externally allocated buffer as
    440    // if it were an internally allocated one.
    441    assert(!buf);
    442    set_flag(SHOULD_FREE, val);
    443  }
    444  void set_cleared(bool val) { set_flag(CLEARED, val); }
    445  void set_zombie(bool val) { set_flag(ZOMBIE, val); }
    446 
    447  // Delayed-clearing state. When a clear of an FB is requested, we don't
    448  // immediately clear each row, as the rows may be subsequently overwritten
    449  // by draw calls, allowing us to skip the work of clearing the affected rows
    450  // either fully or partially. Instead, we keep a bit vector of rows that need
    451  // to be cleared later and save the value they need to be cleared with so
    452  // that we can clear these rows individually when they are touched by draws.
    453  // This currently only works for 2D textures, but not on texture arrays.
    454  int delay_clear = 0;
    455  uint32_t clear_val = 0;
    456  uint32_t* cleared_rows = nullptr;
    457 
    458  void init_depth_runs(uint32_t z);
    459  void fill_depth_runs(uint32_t z, const IntRect& scissor);
    460 
    461  void enable_delayed_clear(uint32_t val) {
    462    delay_clear = height;
    463    clear_val = val;
    464    if (!cleared_rows) {
    465      cleared_rows = new uint32_t[(height + 31) / 32];
    466    }
    467    memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
    468    if (height & 31) {
    469      cleared_rows[height / 32] = ~0U << (height & 31);
    470    }
    471  }
    472 
    473  void disable_delayed_clear() {
    474    if (cleared_rows) {
    475      delete[] cleared_rows;
    476      cleared_rows = nullptr;
    477      delay_clear = 0;
    478    }
    479  }
    480 
    481  int bpp() const { return buf_bpp; }
    482  int compute_bpp() const { return bytes_for_internal_format(internal_format); }
    483 
    484  size_t stride() const { return buf_stride; }
    485  size_t compute_stride(int bpp, int width) const {
    486    return aligned_stride(bpp * width);
    487  }
    488 
    489  // Set an external backing buffer of this texture.
    490  void set_buffer(void* new_buf, size_t new_stride) {
    491    assert(!should_free());
    492    // Ensure that the supplied stride is at least as big as the row data and
    493    // is aligned to the smaller of either the BPP or word-size. We need to at
    494    // least be able to sample data from within a row and sample whole pixels
    495    // of smaller formats without risking unaligned access.
    496    int new_bpp = compute_bpp();
    497    assert(new_stride >= size_t(new_bpp * width) &&
    498           new_stride % min(new_bpp, sizeof(uint32_t)) == 0);
    499 
    500    buf = (char*)new_buf;
    501    buf_size = 0;
    502    buf_bpp = new_bpp;
    503    buf_stride = new_stride;
    504  }
    505 
    506  // Returns true if re-allocation succeeded, false otherwise...
    507  bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
    508    assert(!locked);  // Locked textures shouldn't be reallocated
    509    // If we get here, some GL API call that invalidates the texture was used.
    510    // Mark the buffer as not-cleared to signal this.
    511    set_cleared(false);
    512    // Check if there is either no buffer currently or if we forced validation
    513    // of the buffer size because some dimension might have changed.
    514    if ((!buf || force) && should_free()) {
    515      // Compute the buffer's BPP and stride, since they may have changed.
    516      int new_bpp = compute_bpp();
    517      size_t new_stride = compute_stride(new_bpp, width);
    518      // Compute new size based on the maximum potential stride, rather than
    519      // the current stride, to hopefully avoid reallocations when size would
    520      // otherwise change too much...
    521      size_t max_stride = compute_stride(new_bpp, max(width, min_width));
    522      size_t size = max_stride * max(height, min_height);
    523      if ((!buf && size > 0) || size > buf_size) {
    524        // Allocate with a SIMD register-sized tail of padding at the end so we
    525        // can safely read or write past the end of the texture with SIMD ops.
    526        // Currently only the flat Z-buffer texture needs this padding due to
    527        // full-register loads and stores in check_depth and discard_depth. In
    528        // case some code in the future accidentally uses a linear filter on a
    529        // texture with less than 2 pixels per row, we also add this padding
    530        // just to be safe. All other texture types and use-cases should be
    531        // safe to omit padding.
    532        size_t padding =
    533            internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
    534                ? sizeof(Float)
    535                : 0;
    536        char* new_buf = (char*)realloc(buf, size + padding);
    537        assert(new_buf);
    538        if (!new_buf) {
    539          // Allocation failed, so ensure we don't leave stale buffer state.
    540          cleanup();
    541          return false;
    542        }
    543        // Successfully reallocated the buffer, so go ahead and set it.
    544        buf = new_buf;
    545        buf_size = size;
    546      }
    547      // Set the BPP and stride in case they changed.
    548      buf_bpp = new_bpp;
    549      buf_stride = new_stride;
    550    }
    551    // Allocation succeeded or nothing changed...
    552    return true;
    553  }
    554 
    555  void cleanup() {
    556    assert(!locked);  // Locked textures shouldn't be destroyed
    557    if (buf) {
    558      // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
    559      // regardless of whether we internally allocated it. This will prevent us
    560      // from wrongly treating buf as having been internally allocated for when
    561      // we go to realloc if it actually was externally allocted.
    562      if (should_free()) {
    563        free(buf);
    564      }
    565      buf = nullptr;
    566      buf_size = 0;
    567      buf_bpp = 0;
    568      buf_stride = 0;
    569    }
    570    disable_delayed_clear();
    571  }
    572 
    573  ~Texture() { cleanup(); }
    574 
    575  IntRect bounds() const { return IntRect{0, 0, width, height}; }
    576  IntRect offset_bounds() const { return bounds() + offset; }
    577 
    578  // Find the valid sampling bounds relative to the requested region
    579  IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
    580    IntRect bb = bounds().intersect(req) - req.origin();
    581    if (invertY) bb.invert_y(req.height());
    582    return bb;
    583  }
    584 
    585  // Get a pointer for sampling at the given offset
    586  char* sample_ptr(int x, int y) const {
    587    return buf + y * stride() + x * bpp();
    588  }
    589 
    590  // Get a pointer to the end of the current buffer
    591  char* end_ptr() const {
    592    return buf + (height - 1) * stride() + width * bpp();
    593  }
    594 
    595  // Get a pointer for sampling the requested region and limit to the provided
    596  // sampling bounds
    597  char* sample_ptr(const IntRect& req, const IntRect& bounds,
    598                   bool invertY = false) const {
    599    // Offset the sample pointer by the clamped bounds
    600    int x = req.x0 + bounds.x0;
    601    // Invert the Y offset if necessary
    602    int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
    603    return sample_ptr(x, y);
    604  }
    605 };
    606 
    607 // The last vertex attribute is reserved as a null attribute in case a vertex
    608 // attribute is used without being set.
    609 #define MAX_ATTRIBS 17
    610 #define NULL_ATTRIB 16
    611 struct VertexArray {
    612  VertexAttrib attribs[MAX_ATTRIBS];
    613  int max_attrib = -1;
    614  // The GL spec defines element array buffer binding to be part of VAO state.
    615  GLuint element_array_buffer_binding = 0;
    616 
    617  void validate();
    618 };
    619 
    620 struct Shader {
    621  GLenum type = 0;
    622  ProgramLoader loader = nullptr;
    623 };
    624 
    625 struct Program {
    626  ProgramImpl* impl = nullptr;
    627  VertexShaderImpl* vert_impl = nullptr;
    628  FragmentShaderImpl* frag_impl = nullptr;
    629  bool deleted = false;
    630 
    631  ~Program() { delete impl; }
    632 };
    633 
    634 // clang-format off
    635 // Fully-expand GL defines while ignoring more than 4 suffixes
    636 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
    637 // Generate a blend key enum symbol
    638 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
    639 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
    640 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
    641 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
    642 
    643 // Utility macro to easily generate similar code for all implemented blend modes
    644 #define FOR_EACH_BLEND_KEY(macro)                                              \
    645  macro(GL_ONE, GL_ZERO, 0, 0)                                                 \
    646  macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)  \
    647  macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                  \
    648  macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0)                                 \
    649  macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE)                      \
    650  macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                 \
    651  macro(GL_ZERO, GL_SRC_COLOR, 0, 0)                                           \
    652  macro(GL_ONE, GL_ONE, 0, 0)                                                  \
    653  macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)                        \
    654  macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE)                       \
    655  macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0)                       \
    656  macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0)                                 \
    657  macro(GL_MIN, 0, 0, 0)                                                       \
    658  macro(GL_MAX, 0, 0, 0)                                                       \
    659  macro(GL_MULTIPLY_KHR, 0, 0, 0)                                              \
    660  macro(GL_SCREEN_KHR, 0, 0, 0)                                                \
    661  macro(GL_OVERLAY_KHR, 0, 0, 0)                                               \
    662  macro(GL_DARKEN_KHR, 0, 0, 0)                                                \
    663  macro(GL_LIGHTEN_KHR, 0, 0, 0)                                               \
    664  macro(GL_COLORDODGE_KHR, 0, 0, 0)                                            \
    665  macro(GL_COLORBURN_KHR, 0, 0, 0)                                             \
    666  macro(GL_HARDLIGHT_KHR, 0, 0, 0)                                             \
    667  macro(GL_SOFTLIGHT_KHR, 0, 0, 0)                                             \
    668  macro(GL_DIFFERENCE_KHR, 0, 0, 0)                                            \
    669  macro(GL_EXCLUSION_KHR, 0, 0, 0)                                             \
    670  macro(GL_HSL_HUE_KHR, 0, 0, 0)                                               \
    671  macro(GL_HSL_SATURATION_KHR, 0, 0, 0)                                        \
    672  macro(GL_HSL_COLOR_KHR, 0, 0, 0)                                             \
    673  macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0)                                        \
    674  macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0)                                       \
    675  macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
    676 
    677 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
    678 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
    679 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
    680 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
    681 enum BlendKey : uint8_t {
    682  FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
    683  FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
    684  FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
    685  FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
    686  BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
    687  MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
    688  AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
    689  AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
    690 };
    691 // clang-format on
    692 
    693 const size_t MAX_TEXTURE_UNITS = 16;
    694 
    695 template <typename T>
    696 static inline bool unlink(T& binding, T n) {
    697  if (binding == n) {
    698    binding = 0;
    699    return true;
    700  }
    701  return false;
    702 }
    703 
    704 template <typename O>
    705 struct ObjectStore {
    706  O** objects = nullptr;
    707  size_t size = 0;
    708  // reserve object 0 as null
    709  size_t first_free = 1;
    710  O invalid;
    711 
    712  ~ObjectStore() {
    713    if (objects) {
    714      for (size_t i = 0; i < size; i++) delete objects[i];
    715      free(objects);
    716    }
    717  }
    718 
    719  bool grow(size_t i) {
    720    size_t new_size = size ? size : 8;
    721    while (new_size <= i) new_size += new_size / 2;
    722    O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
    723    assert(new_objects);
    724    if (!new_objects) return false;
    725    while (size < new_size) new_objects[size++] = nullptr;
    726    objects = new_objects;
    727    return true;
    728  }
    729 
    730  void insert(size_t i, const O& o) {
    731    if (i >= size && !grow(i)) return;
    732    if (!objects[i]) objects[i] = new O(o);
    733  }
    734 
    735  size_t next_free() {
    736    size_t i = first_free;
    737    while (i < size && objects[i]) i++;
    738    first_free = i;
    739    return i;
    740  }
    741 
    742  size_t insert(const O& o = O()) {
    743    size_t i = next_free();
    744    insert(i, o);
    745    return i;
    746  }
    747 
    748  O& operator[](size_t i) {
    749    insert(i, O());
    750    return i < size ? *objects[i] : invalid;
    751  }
    752 
    753  O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
    754 
    755  template <typename T>
    756  void on_erase(T*, ...) {}
    757  template <typename T>
    758  void on_erase(T* o, decltype(&T::on_erase)) {
    759    o->on_erase();
    760  }
    761 
    762  bool erase(size_t i, bool should_delete = true) {
    763    if (i < size && objects[i]) {
    764      on_erase(objects[i], nullptr);
    765      if (should_delete) {
    766        delete objects[i];
    767      }
    768      objects[i] = nullptr;
    769      if (i < first_free) first_free = i;
    770      return true;
    771    }
    772    return false;
    773  }
    774 
    775  O** begin() const { return objects; }
    776  O** end() const { return &objects[size]; }
    777 };
    778 
    779 struct Context {
    780  int32_t references = 1;
    781 
    782  ObjectStore<Query> queries;
    783  ObjectStore<Buffer> buffers;
    784  ObjectStore<Texture> textures;
    785  ObjectStore<VertexArray> vertex_arrays;
    786  ObjectStore<Framebuffer> framebuffers;
    787  ObjectStore<Renderbuffer> renderbuffers;
    788  ObjectStore<Shader> shaders;
    789  ObjectStore<Program> programs;
    790 
    791  GLenum last_error = GL_NO_ERROR;
    792 
    793  IntRect viewport = {0, 0, 0, 0};
    794 
    795  bool blend = false;
    796  GLenum blendfunc_srgb = GL_ONE;
    797  GLenum blendfunc_drgb = GL_ZERO;
    798  GLenum blendfunc_sa = GL_ONE;
    799  GLenum blendfunc_da = GL_ZERO;
    800  GLenum blend_equation = GL_FUNC_ADD;
    801  V8<uint16_t> blendcolor = 0;
    802  BlendKey blend_key = BLEND_KEY_NONE;
    803 
    804  bool depthtest = false;
    805  bool depthmask = true;
    806  GLenum depthfunc = GL_LESS;
    807 
    808  bool scissortest = false;
    809  IntRect scissor = {0, 0, 0, 0};
    810 
    811  GLfloat clearcolor[4] = {0, 0, 0, 0};
    812  GLdouble cleardepth = 1;
    813 
    814  int unpack_row_length = 0;
    815 
    816  int shaded_rows = 0;
    817  int shaded_pixels = 0;
    818 
    819  struct TextureUnit {
    820    GLuint texture_2d_binding = 0;
    821    GLuint texture_rectangle_binding = 0;
    822 
    823    void unlink(GLuint n) {
    824      ::unlink(texture_2d_binding, n);
    825      ::unlink(texture_rectangle_binding, n);
    826    }
    827  };
    828  TextureUnit texture_units[MAX_TEXTURE_UNITS];
    829  int active_texture_unit = 0;
    830 
    831  GLuint current_program = 0;
    832 
    833  GLuint current_vertex_array = 0;
    834  bool validate_vertex_array = true;
    835 
    836  GLuint pixel_pack_buffer_binding = 0;
    837  GLuint pixel_unpack_buffer_binding = 0;
    838  GLuint array_buffer_binding = 0;
    839  GLuint time_elapsed_query = 0;
    840  GLuint samples_passed_query = 0;
    841  GLuint renderbuffer_binding = 0;
    842  GLuint draw_framebuffer_binding = 0;
    843  GLuint read_framebuffer_binding = 0;
    844  GLuint unknown_binding = 0;
    845 
    846  GLuint& get_binding(GLenum name) {
    847    switch (name) {
    848      case GL_PIXEL_PACK_BUFFER:
    849        return pixel_pack_buffer_binding;
    850      case GL_PIXEL_UNPACK_BUFFER:
    851        return pixel_unpack_buffer_binding;
    852      case GL_ARRAY_BUFFER:
    853        return array_buffer_binding;
    854      case GL_ELEMENT_ARRAY_BUFFER:
    855        return vertex_arrays[current_vertex_array].element_array_buffer_binding;
    856      case GL_TEXTURE_2D:
    857        return texture_units[active_texture_unit].texture_2d_binding;
    858      case GL_TEXTURE_RECTANGLE:
    859        return texture_units[active_texture_unit].texture_rectangle_binding;
    860      case GL_TIME_ELAPSED:
    861        return time_elapsed_query;
    862      case GL_SAMPLES_PASSED:
    863        return samples_passed_query;
    864      case GL_RENDERBUFFER:
    865        return renderbuffer_binding;
    866      case GL_DRAW_FRAMEBUFFER:
    867        return draw_framebuffer_binding;
    868      case GL_READ_FRAMEBUFFER:
    869        return read_framebuffer_binding;
    870      default:
    871        debugf("unknown binding %x\n", name);
    872        assert(false);
    873        return unknown_binding;
    874    }
    875  }
    876 
    877  Texture& get_texture(sampler2D, int unit) {
    878    return textures[texture_units[unit].texture_2d_binding];
    879  }
    880 
    881  Texture& get_texture(isampler2D, int unit) {
    882    return textures[texture_units[unit].texture_2d_binding];
    883  }
    884 
    885  Texture& get_texture(sampler2DRect, int unit) {
    886    return textures[texture_units[unit].texture_rectangle_binding];
    887  }
    888 
    889  IntRect apply_scissor(IntRect bb,
    890                        const IntPoint& origin = IntPoint(0, 0)) const {
    891    return scissortest ? bb.intersect(scissor - origin) : bb;
    892  }
    893 
    894  IntRect apply_scissor(const Texture& t) const {
    895    return apply_scissor(t.bounds(), t.offset);
    896  }
    897 };
    898 static Context* ctx = nullptr;
    899 static VertexShaderImpl* vertex_shader = nullptr;
    900 static FragmentShaderImpl* fragment_shader = nullptr;
    901 static BlendKey blend_key = BLEND_KEY_NONE;
    902 
    903 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
    904 
    905 template <typename S>
    906 static inline void init_filter(S* s, Texture& t) {
    907  // If the width is not at least 2 pixels, then we can't safely sample the end
    908  // of the row with a linear filter. In that case, just punt to using nearest
    909  // filtering instead.
    910  s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
    911                           : TextureFilter::NEAREST;
    912 }
    913 
    914 template <typename S>
    915 static inline void init_sampler(S* s, Texture& t) {
    916  prepare_texture(t);
    917  s->width = t.width;
    918  s->height = t.height;
    919  s->stride = t.stride();
    920  int bpp = t.bpp();
    921  if (bpp >= 4)
    922    s->stride /= 4;
    923  else if (bpp == 2)
    924    s->stride /= 2;
    925  else
    926    assert(bpp == 1);
    927  // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
    928  // uint16_t* for formats with bpp < 4.
    929  s->buf = (uint32_t*)t.buf;
    930  s->format = gl_format_to_texture_format(t.internal_format);
    931 }
    932 
    933 template <typename S>
    934 static inline void null_sampler(S* s) {
    935  // For null texture data, just make the sampler provide a 1x1 buffer that is
    936  // transparent black. Ensure buffer holds at least a SIMD vector of zero data
    937  // for SIMD padding of unaligned loads.
    938  static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
    939  s->width = 1;
    940  s->height = 1;
    941  s->stride = s->width;
    942  s->buf = (uint32_t*)zeroBuf;
    943  s->format = TextureFormat::RGBA8;
    944 }
    945 
    946 template <typename S>
    947 static inline void null_filter(S* s) {
    948  s->filter = TextureFilter::NEAREST;
    949 }
    950 
    951 template <typename S>
    952 S* lookup_sampler(S* s, int texture) {
    953  Texture& t = ctx->get_texture(s, texture);
    954  if (!t.buf) {
    955    null_sampler(s);
    956    null_filter(s);
    957  } else {
    958    init_sampler(s, t);
    959    init_filter(s, t);
    960  }
    961  return s;
    962 }
    963 
    964 template <typename S>
    965 S* lookup_isampler(S* s, int texture) {
    966  Texture& t = ctx->get_texture(s, texture);
    967  if (!t.buf) {
    968    null_sampler(s);
    969  } else {
    970    init_sampler(s, t);
    971  }
    972  return s;
    973 }
    974 
    975 int bytes_per_type(GLenum type) {
    976  switch (type) {
    977    case GL_INT:
    978      return 4;
    979    case GL_FLOAT:
    980      return 4;
    981    case GL_UNSIGNED_SHORT:
    982      return 2;
    983    case GL_UNSIGNED_BYTE:
    984      return 1;
    985    default:
    986      assert(0);
    987      return 0;
    988  }
    989 }
    990 
    991 template <typename S, typename C>
    992 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
    993  typedef typename ElementType<S>::ty elem_type;
    994  S scalar = {0};
    995  const C* src = reinterpret_cast<const C*>(buf);
    996  if (normalized) {
    997    const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
    998    for (size_t i = 0; i < size / sizeof(C); i++) {
    999      put_nth_component(scalar, i, elem_type(src[i]) * scale);
   1000    }
   1001  } else {
   1002    for (size_t i = 0; i < size / sizeof(C); i++) {
   1003      put_nth_component(scalar, i, elem_type(src[i]));
   1004    }
   1005  }
   1006  return scalar;
   1007 }
   1008 
   1009 template <typename S>
   1010 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
   1011  if (sizeof(S) <= va.size) {
   1012    return *reinterpret_cast<const S*>(src);
   1013  }
   1014  if (va.type == GL_UNSIGNED_SHORT) {
   1015    return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
   1016  }
   1017  if (va.type == GL_UNSIGNED_BYTE) {
   1018    return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
   1019  }
   1020  assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
   1021  S scalar = {0};
   1022  memcpy(&scalar, src, va.size);
   1023  return scalar;
   1024 }
   1025 
   1026 template <typename T>
   1027 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
   1028                 int count) {
   1029  typedef decltype(force_scalar(attrib)) scalar_type;
   1030  // If no buffer is available, just use a zero default.
   1031  if (!va.buf_size) {
   1032    attrib = T(scalar_type{0});
   1033  } else if (va.divisor != 0) {
   1034    char* src = (char*)va.buf + va.stride * instance + va.offset;
   1035    assert(src + va.size <= va.buf + va.buf_size);
   1036    attrib = T(load_attrib_scalar<scalar_type>(va, src));
   1037  } else {
   1038    // Specialized for WR's primitive vertex order/winding.
   1039    if (!count) return;
   1040    assert(count >= 2 && count <= 4);
   1041    char* src = (char*)va.buf + va.stride * start + va.offset;
   1042    switch (count) {
   1043      case 2: {
   1044        // Lines must be indexed at offsets 0, 1.
   1045        // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
   1046        scalar_type lanes[2] = {
   1047            load_attrib_scalar<scalar_type>(va, src),
   1048            load_attrib_scalar<scalar_type>(va, src + va.stride)};
   1049        attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
   1050        break;
   1051      }
   1052      case 3: {
   1053        // Triangles must be indexed at offsets 0, 1, 2.
   1054        // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
   1055        scalar_type lanes[3] = {
   1056            load_attrib_scalar<scalar_type>(va, src),
   1057            load_attrib_scalar<scalar_type>(va, src + va.stride),
   1058            load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
   1059        attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
   1060        break;
   1061      }
   1062      default:
   1063        // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
   1064        // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
   1065        // that the points form a convex path that can be traversed by the
   1066        // rasterizer.
   1067        attrib = (T){load_attrib_scalar<scalar_type>(va, src),
   1068                     load_attrib_scalar<scalar_type>(va, src + va.stride),
   1069                     load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
   1070                     load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
   1071        break;
   1072    }
   1073  }
   1074 }
   1075 
   1076 template <typename T>
   1077 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
   1078                      int count) {
   1079  typedef decltype(force_scalar(attrib)) scalar_type;
   1080  // If no buffer is available, just use a zero default.
   1081  if (!va.buf_size) {
   1082    attrib = T{0};
   1083    return;
   1084  }
   1085  char* src = nullptr;
   1086  if (va.divisor != 0) {
   1087    src = (char*)va.buf + va.stride * instance + va.offset;
   1088  } else {
   1089    if (!count) return;
   1090    src = (char*)va.buf + va.stride * start + va.offset;
   1091  }
   1092  assert(src + va.size <= va.buf + va.buf_size);
   1093  attrib = T(load_attrib_scalar<scalar_type>(va, src));
   1094 }
   1095 
   1096 void setup_program(GLuint program) {
   1097  if (!program) {
   1098    vertex_shader = nullptr;
   1099    fragment_shader = nullptr;
   1100    return;
   1101  }
   1102  Program& p = ctx->programs[program];
   1103  assert(p.impl);
   1104  assert(p.vert_impl);
   1105  assert(p.frag_impl);
   1106  vertex_shader = p.vert_impl;
   1107  fragment_shader = p.frag_impl;
   1108 }
   1109 
   1110 extern ProgramLoader load_shader(const char* name);
   1111 
   1112 extern "C" {
   1113 
   1114 void UseProgram(GLuint program) {
   1115  if (ctx->current_program && program != ctx->current_program) {
   1116    auto* p = ctx->programs.find(ctx->current_program);
   1117    if (p && p->deleted) {
   1118      ctx->programs.erase(ctx->current_program);
   1119    }
   1120  }
   1121  ctx->current_program = program;
   1122  setup_program(program);
   1123 }
   1124 
   1125 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
   1126  ctx->viewport = IntRect{x, y, x + width, y + height};
   1127 }
   1128 
   1129 void Enable(GLenum cap) {
   1130  switch (cap) {
   1131    case GL_BLEND:
   1132      ctx->blend = true;
   1133      break;
   1134    case GL_DEPTH_TEST:
   1135      ctx->depthtest = true;
   1136      break;
   1137    case GL_SCISSOR_TEST:
   1138      ctx->scissortest = true;
   1139      break;
   1140  }
   1141 }
   1142 
   1143 void Disable(GLenum cap) {
   1144  switch (cap) {
   1145    case GL_BLEND:
   1146      ctx->blend = false;
   1147      break;
   1148    case GL_DEPTH_TEST:
   1149      ctx->depthtest = false;
   1150      break;
   1151    case GL_SCISSOR_TEST:
   1152      ctx->scissortest = false;
   1153      break;
   1154  }
   1155 }
   1156 
   1157 // Report the last error generated and clear the error status.
   1158 GLenum GetError() {
   1159  GLenum error = ctx->last_error;
   1160  ctx->last_error = GL_NO_ERROR;
   1161  return error;
   1162 }
   1163 
   1164 // Sets the error status to out-of-memory to indicate that a buffer
   1165 // or texture re-allocation failed.
   1166 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
   1167 
   1168 static const char* const extensions[] = {
   1169    "GL_ARB_blend_func_extended",
   1170    "GL_ARB_clear_texture",
   1171    "GL_ARB_copy_image",
   1172    "GL_ARB_draw_instanced",
   1173    "GL_ARB_explicit_attrib_location",
   1174    "GL_ARB_instanced_arrays",
   1175    "GL_ARB_invalidate_subdata",
   1176    "GL_ARB_texture_storage",
   1177    "GL_EXT_timer_query",
   1178    "GL_KHR_blend_equation_advanced",
   1179    "GL_KHR_blend_equation_advanced_coherent",
   1180    "GL_APPLE_rgb_422",
   1181 };
   1182 
   1183 void GetIntegerv(GLenum pname, GLint* params) {
   1184  assert(params);
   1185  switch (pname) {
   1186    case GL_MAX_TEXTURE_UNITS:
   1187    case GL_MAX_TEXTURE_IMAGE_UNITS:
   1188      params[0] = MAX_TEXTURE_UNITS;
   1189      break;
   1190    case GL_MAX_TEXTURE_SIZE:
   1191      params[0] = 1 << 15;
   1192      break;
   1193    case GL_MAX_ARRAY_TEXTURE_LAYERS:
   1194      params[0] = 0;
   1195      break;
   1196    case GL_READ_FRAMEBUFFER_BINDING:
   1197      params[0] = ctx->read_framebuffer_binding;
   1198      break;
   1199    case GL_DRAW_FRAMEBUFFER_BINDING:
   1200      params[0] = ctx->draw_framebuffer_binding;
   1201      break;
   1202    case GL_PIXEL_PACK_BUFFER_BINDING:
   1203      params[0] = ctx->pixel_pack_buffer_binding;
   1204      break;
   1205    case GL_PIXEL_UNPACK_BUFFER_BINDING:
   1206      params[0] = ctx->pixel_unpack_buffer_binding;
   1207      break;
   1208    case GL_NUM_EXTENSIONS:
   1209      params[0] = sizeof(extensions) / sizeof(extensions[0]);
   1210      break;
   1211    case GL_MAJOR_VERSION:
   1212      params[0] = 3;
   1213      break;
   1214    case GL_MINOR_VERSION:
   1215      params[0] = 2;
   1216      break;
   1217    case GL_MIN_PROGRAM_TEXEL_OFFSET:
   1218      params[0] = 0;
   1219      break;
   1220    case GL_MAX_PROGRAM_TEXEL_OFFSET:
   1221      params[0] = MAX_TEXEL_OFFSET;
   1222      break;
   1223    default:
   1224      debugf("unhandled glGetIntegerv parameter %x\n", pname);
   1225      assert(false);
   1226  }
   1227 }
   1228 
   1229 void GetBooleanv(GLenum pname, GLboolean* params) {
   1230  assert(params);
   1231  switch (pname) {
   1232    case GL_DEPTH_WRITEMASK:
   1233      params[0] = ctx->depthmask;
   1234      break;
   1235    default:
   1236      debugf("unhandled glGetBooleanv parameter %x\n", pname);
   1237      assert(false);
   1238  }
   1239 }
   1240 
   1241 const char* GetString(GLenum name) {
   1242  switch (name) {
   1243    case GL_VENDOR:
   1244      return "Mozilla Gfx";
   1245    case GL_RENDERER:
   1246      return "Software WebRender";
   1247    case GL_VERSION:
   1248      return "3.2";
   1249    case GL_SHADING_LANGUAGE_VERSION:
   1250      return "1.50";
   1251    default:
   1252      debugf("unhandled glGetString parameter %x\n", name);
   1253      assert(false);
   1254      return nullptr;
   1255  }
   1256 }
   1257 
   1258 const char* GetStringi(GLenum name, GLuint index) {
   1259  switch (name) {
   1260    case GL_EXTENSIONS:
   1261      if (index >= sizeof(extensions) / sizeof(extensions[0])) {
   1262        return nullptr;
   1263      }
   1264      return extensions[index];
   1265    default:
   1266      debugf("unhandled glGetStringi parameter %x\n", name);
   1267      assert(false);
   1268      return nullptr;
   1269  }
   1270 }
   1271 
   1272 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
   1273  switch (a) {
   1274    case GL_SRC_ALPHA:
   1275      if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
   1276      break;
   1277    case GL_ONE_MINUS_SRC_ALPHA:
   1278      if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
   1279      break;
   1280    case GL_DST_ALPHA:
   1281      if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
   1282      break;
   1283    case GL_ONE_MINUS_DST_ALPHA:
   1284      if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
   1285      break;
   1286    case GL_CONSTANT_ALPHA:
   1287      if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
   1288      break;
   1289    case GL_ONE_MINUS_CONSTANT_ALPHA:
   1290      if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
   1291      break;
   1292    case GL_SRC_COLOR:
   1293      if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
   1294      break;
   1295    case GL_ONE_MINUS_SRC_COLOR:
   1296      if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
   1297      break;
   1298    case GL_DST_COLOR:
   1299      if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
   1300      break;
   1301    case GL_ONE_MINUS_DST_COLOR:
   1302      if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
   1303      break;
   1304    case GL_CONSTANT_COLOR:
   1305      if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
   1306      break;
   1307    case GL_ONE_MINUS_CONSTANT_COLOR:
   1308      if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
   1309      break;
   1310    case GL_SRC1_ALPHA:
   1311      if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
   1312      break;
   1313    case GL_ONE_MINUS_SRC1_ALPHA:
   1314      if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
   1315      break;
   1316    case GL_SRC1_COLOR:
   1317      if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
   1318      break;
   1319    case GL_ONE_MINUS_SRC1_COLOR:
   1320      if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
   1321      break;
   1322  }
   1323  return a;
   1324 }
   1325 
   1326 // Generate a hashed blend key based on blend func and equation state. This
   1327 // allows all the blend state to be processed down to a blend key that can be
   1328 // dealt with inside a single switch statement.
   1329 static void hash_blend_key() {
   1330  GLenum srgb = ctx->blendfunc_srgb;
   1331  GLenum drgb = ctx->blendfunc_drgb;
   1332  GLenum sa = ctx->blendfunc_sa;
   1333  GLenum da = ctx->blendfunc_da;
   1334  GLenum equation = ctx->blend_equation;
   1335 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
   1336  // Basic non-separate blend funcs used the two argument form
   1337  int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
   1338  // Separate alpha blend funcs use the 4 argument hash
   1339  if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
   1340  // Any other blend equation than the default func_add ignores the func and
   1341  // instead generates a one-argument hash based on the equation
   1342  if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
   1343  switch (hash) {
   1344 #define MAP_BLEND_KEY(...)                   \
   1345  case HASH_BLEND_KEY(__VA_ARGS__):          \
   1346    ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
   1347    break;
   1348    FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
   1349    default:
   1350      debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
   1351             sa, da, equation);
   1352      assert(false);
   1353      break;
   1354  }
   1355 }
   1356 
   1357 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
   1358  ctx->blendfunc_srgb = srgb;
   1359  ctx->blendfunc_drgb = drgb;
   1360  sa = remap_blendfunc(srgb, sa);
   1361  da = remap_blendfunc(drgb, da);
   1362  ctx->blendfunc_sa = sa;
   1363  ctx->blendfunc_da = da;
   1364 
   1365  hash_blend_key();
   1366 }
   1367 
   1368 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
   1369  I32 c = round_pixel((Float){b, g, r, a});
   1370  ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
   1371 }
   1372 
   1373 void BlendEquation(GLenum mode) {
   1374  assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
   1375         (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
   1376  if (mode != ctx->blend_equation) {
   1377    ctx->blend_equation = mode;
   1378    hash_blend_key();
   1379  }
   1380 }
   1381 
   1382 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
   1383 
   1384 void DepthFunc(GLenum func) {
   1385  switch (func) {
   1386    case GL_LESS:
   1387    case GL_LEQUAL:
   1388      break;
   1389    default:
   1390      assert(false);
   1391  }
   1392  ctx->depthfunc = func;
   1393 }
   1394 
   1395 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
   1396  ctx->scissor = IntRect{x, y, x + width, y + height};
   1397 }
   1398 
   1399 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
   1400  ctx->clearcolor[0] = r;
   1401  ctx->clearcolor[1] = g;
   1402  ctx->clearcolor[2] = b;
   1403  ctx->clearcolor[3] = a;
   1404 }
   1405 
   1406 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
   1407 
   1408 void ActiveTexture(GLenum texture) {
   1409  assert(texture >= GL_TEXTURE0);
   1410  assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
   1411  ctx->active_texture_unit =
   1412      clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
   1413 }
   1414 
   1415 void GenQueries(GLsizei n, GLuint* result) {
   1416  for (int i = 0; i < n; i++) {
   1417    Query q;
   1418    result[i] = ctx->queries.insert(q);
   1419  }
   1420 }
   1421 
   1422 void DeleteQuery(GLuint n) {
   1423  if (n && ctx->queries.erase(n)) {
   1424    unlink(ctx->time_elapsed_query, n);
   1425    unlink(ctx->samples_passed_query, n);
   1426  }
   1427 }
   1428 
   1429 void GenBuffers(int n, GLuint* result) {
   1430  for (int i = 0; i < n; i++) {
   1431    Buffer b;
   1432    result[i] = ctx->buffers.insert(b);
   1433  }
   1434 }
   1435 
   1436 void DeleteBuffer(GLuint n) {
   1437  if (n && ctx->buffers.erase(n)) {
   1438    unlink(ctx->pixel_pack_buffer_binding, n);
   1439    unlink(ctx->pixel_unpack_buffer_binding, n);
   1440    unlink(ctx->array_buffer_binding, n);
   1441  }
   1442 }
   1443 
   1444 void GenVertexArrays(int n, GLuint* result) {
   1445  for (int i = 0; i < n; i++) {
   1446    VertexArray v;
   1447    result[i] = ctx->vertex_arrays.insert(v);
   1448  }
   1449 }
   1450 
   1451 void DeleteVertexArray(GLuint n) {
   1452  if (n && ctx->vertex_arrays.erase(n)) {
   1453    unlink(ctx->current_vertex_array, n);
   1454  }
   1455 }
   1456 
   1457 GLuint CreateShader(GLenum type) {
   1458  Shader s;
   1459  s.type = type;
   1460  return ctx->shaders.insert(s);
   1461 }
   1462 
   1463 void ShaderSourceByName(GLuint shader, char* name) {
   1464  Shader& s = ctx->shaders[shader];
   1465  s.loader = load_shader(name);
   1466  if (!s.loader) {
   1467    debugf("unknown shader %s\n", name);
   1468  }
   1469 }
   1470 
   1471 void AttachShader(GLuint program, GLuint shader) {
   1472  Program& p = ctx->programs[program];
   1473  Shader& s = ctx->shaders[shader];
   1474  if (s.type == GL_VERTEX_SHADER) {
   1475    if (!p.impl && s.loader) p.impl = s.loader();
   1476  } else if (s.type == GL_FRAGMENT_SHADER) {
   1477    if (!p.impl && s.loader) p.impl = s.loader();
   1478  } else {
   1479    assert(0);
   1480  }
   1481 }
   1482 
   1483 void DeleteShader(GLuint n) {
   1484  if (n) ctx->shaders.erase(n);
   1485 }
   1486 
   1487 GLuint CreateProgram() {
   1488  Program p;
   1489  return ctx->programs.insert(p);
   1490 }
   1491 
   1492 void DeleteProgram(GLuint n) {
   1493  if (!n) return;
   1494  if (ctx->current_program == n) {
   1495    if (auto* p = ctx->programs.find(n)) {
   1496      p->deleted = true;
   1497    }
   1498  } else {
   1499    ctx->programs.erase(n);
   1500  }
   1501 }
   1502 
   1503 void LinkProgram(GLuint program) {
   1504  Program& p = ctx->programs[program];
   1505  assert(p.impl);
   1506  if (!p.impl) {
   1507    return;
   1508  }
   1509  assert(p.impl->interpolants_size() <= sizeof(Interpolants));
   1510  if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
   1511  if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
   1512 }
   1513 
   1514 GLint GetLinkStatus(GLuint program) {
   1515  if (auto* p = ctx->programs.find(program)) {
   1516    return p->impl ? 1 : 0;
   1517  }
   1518  return 0;
   1519 }
   1520 
   1521 void BindAttribLocation(GLuint program, GLuint index, char* name) {
   1522  Program& p = ctx->programs[program];
   1523  assert(p.impl);
   1524  if (!p.impl) {
   1525    return;
   1526  }
   1527  p.impl->bind_attrib(name, index);
   1528 }
   1529 
   1530 GLint GetAttribLocation(GLuint program, char* name) {
   1531  Program& p = ctx->programs[program];
   1532  assert(p.impl);
   1533  if (!p.impl) {
   1534    return -1;
   1535  }
   1536  return p.impl->get_attrib(name);
   1537 }
   1538 
   1539 GLint GetUniformLocation(GLuint program, char* name) {
   1540  Program& p = ctx->programs[program];
   1541  assert(p.impl);
   1542  if (!p.impl) {
   1543    return -1;
   1544  }
   1545  GLint loc = p.impl->get_uniform(name);
   1546  // debugf("location: %d\n", loc);
   1547  return loc;
   1548 }
   1549 
   1550 static uint64_t get_time_value() {
   1551 #ifdef __MACH__
   1552  return mach_absolute_time();
   1553 #elif defined(_WIN32)
   1554  LARGE_INTEGER time;
   1555  static bool have_frequency = false;
   1556  static LARGE_INTEGER frequency;
   1557  if (!have_frequency) {
   1558    QueryPerformanceFrequency(&frequency);
   1559    have_frequency = true;
   1560  }
   1561  QueryPerformanceCounter(&time);
   1562  return time.QuadPart * 1000000000ULL / frequency.QuadPart;
   1563 #else
   1564  return ({
   1565    struct timespec tp;
   1566    clock_gettime(CLOCK_MONOTONIC, &tp);
   1567    tp.tv_sec * 1000000000ULL + tp.tv_nsec;
   1568  });
   1569 #endif
   1570 }
   1571 
   1572 void BeginQuery(GLenum target, GLuint id) {
   1573  ctx->get_binding(target) = id;
   1574  Query& q = ctx->queries[id];
   1575  switch (target) {
   1576    case GL_SAMPLES_PASSED:
   1577      q.value = 0;
   1578      break;
   1579    case GL_TIME_ELAPSED:
   1580      q.value = get_time_value();
   1581      break;
   1582    default:
   1583      debugf("unknown query target %x for query %d\n", target, id);
   1584      assert(false);
   1585  }
   1586 }
   1587 
   1588 void EndQuery(GLenum target) {
   1589  Query& q = ctx->queries[ctx->get_binding(target)];
   1590  switch (target) {
   1591    case GL_SAMPLES_PASSED:
   1592      break;
   1593    case GL_TIME_ELAPSED:
   1594      q.value = get_time_value() - q.value;
   1595      break;
   1596    default:
   1597      debugf("unknown query target %x\n", target);
   1598      assert(false);
   1599  }
   1600  ctx->get_binding(target) = 0;
   1601 }
   1602 
   1603 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
   1604  Query& q = ctx->queries[id];
   1605  switch (pname) {
   1606    case GL_QUERY_RESULT:
   1607      assert(params);
   1608      params[0] = q.value;
   1609      break;
   1610    default:
   1611      assert(false);
   1612  }
   1613 }
   1614 
   1615 void BindVertexArray(GLuint vertex_array) {
   1616  if (vertex_array != ctx->current_vertex_array) {
   1617    ctx->validate_vertex_array = true;
   1618  }
   1619  ctx->current_vertex_array = vertex_array;
   1620 }
   1621 
   1622 void BindTexture(GLenum target, GLuint texture) {
   1623  ctx->get_binding(target) = texture;
   1624 }
   1625 
   1626 void BindBuffer(GLenum target, GLuint buffer) {
   1627  ctx->get_binding(target) = buffer;
   1628 }
   1629 
   1630 void BindFramebuffer(GLenum target, GLuint fb) {
   1631  if (target == GL_FRAMEBUFFER) {
   1632    ctx->read_framebuffer_binding = fb;
   1633    ctx->draw_framebuffer_binding = fb;
   1634  } else {
   1635    assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
   1636    ctx->get_binding(target) = fb;
   1637  }
   1638 }
   1639 
   1640 void BindRenderbuffer(GLenum target, GLuint rb) {
   1641  ctx->get_binding(target) = rb;
   1642 }
   1643 
   1644 void PixelStorei(GLenum name, GLint param) {
   1645  if (name == GL_UNPACK_ALIGNMENT) {
   1646    assert(param == 1);
   1647  } else if (name == GL_UNPACK_ROW_LENGTH) {
   1648    ctx->unpack_row_length = param;
   1649  }
   1650 }
   1651 
   1652 static GLenum remap_internal_format(GLenum format) {
   1653  switch (format) {
   1654    case GL_DEPTH_COMPONENT:
   1655      return GL_DEPTH_COMPONENT24;
   1656    case GL_RGBA:
   1657      return GL_RGBA8;
   1658    case GL_RED:
   1659      return GL_R8;
   1660    case GL_RG:
   1661      return GL_RG8;
   1662    case GL_RGB_422_APPLE:
   1663      return GL_RGB_RAW_422_APPLE;
   1664    default:
   1665      return format;
   1666  }
   1667 }
   1668 
   1669 }  // extern "C"
   1670 
   1671 static bool format_requires_conversion(GLenum external_format,
   1672                                       GLenum internal_format) {
   1673  switch (external_format) {
   1674    case GL_RGBA:
   1675      return internal_format == GL_RGBA8;
   1676    case GL_RED:
   1677      return internal_format != GL_R8 && internal_format != GL_R16;
   1678    case GL_RG:
   1679      return internal_format != GL_RG8 && internal_format != GL_RG16;
   1680    default:
   1681      return false;
   1682  }
   1683 }
   1684 
   1685 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
   1686                                       int width) {
   1687  for (; width >= 4; width -= 4, dest += 4, src += 4) {
   1688    U32 p = unaligned_load<U32>(src);
   1689    U32 rb = p & 0x00FF00FF;
   1690    unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
   1691  }
   1692  for (; width > 0; width--, dest++, src++) {
   1693    uint32_t p = *src;
   1694    uint32_t rb = p & 0x00FF00FF;
   1695    *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
   1696  }
   1697 }
   1698 
   1699 static inline void copy_red_to_rgba32f(float* dest, const float* src,
   1700                                       int width) {
   1701  for (; width > 0; width--, dest += 4, src++) {
   1702    dest[0] = *src;
   1703    dest[1] = 0.0f;
   1704    dest[2] = 0.0f;
   1705    dest[3] = 1.0f;
   1706  }
   1707 }
   1708 
   1709 static inline void copy_red_to_bgra8(uint8_t* dest, const uint8_t* src,
   1710                                     int width) {
   1711  for (; width > 0; width--, dest += 4, src++) {
   1712    dest[0] = 0;
   1713    dest[1] = 0;
   1714    dest[2] = *src;
   1715    dest[3] = 255;
   1716  }
   1717 }
   1718 
   1719 template <typename T, size_t N = 1>
   1720 static int clip_ptrs_against_bounds(T*& dst_buf, T* dst_bound0, T* dst_bound1,
   1721                                    const T*& src_buf, const T* src_bound0,
   1722                                    const T* src_bound1, size_t& len) {
   1723  if (dst_bound0) {
   1724    assert(dst_bound0 <= dst_bound1);
   1725    if (dst_buf < dst_bound0) {
   1726      size_t offset = size_t(dst_bound0 - dst_buf) / N;
   1727      if (len <= offset) {
   1728        // dst entirely before bounds
   1729        len = 0;
   1730        return -1;
   1731      }
   1732      // dst overlaps bound0
   1733      src_buf += offset;
   1734      dst_buf += offset * N;
   1735      len -= offset;
   1736    }
   1737    if (dst_buf >= dst_bound1) {
   1738      // dst entirely after bounds
   1739      len = 0;
   1740      return 1;
   1741    }
   1742    size_t remaining = size_t(dst_bound1 - dst_buf) / N;
   1743    if (len > remaining) {
   1744      // dst overlaps bound1
   1745      len = remaining;
   1746    }
   1747  }
   1748  if (src_bound0) {
   1749    assert(src_bound0 <= src_bound1);
   1750    if (src_buf < src_bound0) {
   1751      size_t offset = size_t(src_bound0 - src_buf);
   1752      if (len <= offset) {
   1753        // src entirely before bounds
   1754        len = 0;
   1755        return -1;
   1756      }
   1757      // src overlaps bound0
   1758      src_buf += offset;
   1759      dst_buf += offset * N;
   1760      len -= offset;
   1761    }
   1762    if (src_buf >= src_bound1) {
   1763      // src entirely after bounds
   1764      len = 0;
   1765      return 1;
   1766    }
   1767    size_t remaining = size_t(src_bound1 - src_buf);
   1768    if (len > remaining) {
   1769      // src overlaps bound1
   1770      len = remaining;
   1771    }
   1772  }
   1773  return 0;
   1774 }
   1775 
   1776 static void convert_copy(GLenum external_format, GLenum internal_format,
   1777                         uint8_t* dst_buf, size_t dst_stride,
   1778                         uint8_t* dst_bound0, uint8_t* dst_bound1,
   1779                         const uint8_t* src_buf, size_t src_stride,
   1780                         const uint8_t* src_bound0, const uint8_t* src_bound1,
   1781                         size_t width, size_t height) {
   1782  switch (external_format) {
   1783    case GL_RGBA:
   1784      if (internal_format == GL_RGBA8) {
   1785        for (; height; height--) {
   1786          size_t len = width;
   1787          uint32_t* dst_ptr = (uint32_t*)dst_buf;
   1788          const uint32_t* src_ptr = (const uint32_t*)src_buf;
   1789          if (clip_ptrs_against_bounds(dst_ptr, (uint32_t*)dst_bound0,
   1790                                       (uint32_t*)dst_bound1, src_ptr,
   1791                                       (const uint32_t*)src_bound0,
   1792                                       (const uint32_t*)src_bound1, len) > 0) {
   1793            return;
   1794          }
   1795          if (len) {
   1796            copy_bgra8_to_rgba8(dst_ptr, src_ptr, len);
   1797          }
   1798          dst_buf += dst_stride;
   1799          src_buf += src_stride;
   1800        }
   1801        return;
   1802      }
   1803      break;
   1804    case GL_RED:
   1805      switch (internal_format) {
   1806        case GL_RGBA8:
   1807          for (; height; height--) {
   1808            size_t len = width;
   1809            uint8_t* dst_ptr = dst_buf;
   1810            const uint8_t* src_ptr = src_buf;
   1811            if (clip_ptrs_against_bounds<uint8_t, 4>(
   1812                    dst_ptr, dst_bound0, dst_bound1, src_ptr, src_bound0,
   1813                    src_bound1, len) > 0) {
   1814              return;
   1815            }
   1816            if (len) {
   1817              copy_red_to_bgra8(dst_ptr, src_ptr, len);
   1818            }
   1819            dst_buf += dst_stride;
   1820            src_buf += src_stride;
   1821          }
   1822          return;
   1823        case GL_RGBA32F:
   1824          for (; height; height--) {
   1825            size_t len = width;
   1826            float* dst_ptr = (float*)dst_buf;
   1827            const float* src_ptr = (const float*)src_buf;
   1828            if (clip_ptrs_against_bounds<float, 4>(
   1829                    dst_ptr, (float*)dst_bound0, (float*)dst_bound1, src_ptr,
   1830                    (const float*)src_bound0, (const float*)src_bound1,
   1831                    len) > 0) {
   1832              return;
   1833            }
   1834            if (len) {
   1835              copy_red_to_rgba32f(dst_ptr, src_ptr, len);
   1836            }
   1837            dst_buf += dst_stride;
   1838            src_buf += src_stride;
   1839          }
   1840          return;
   1841        case GL_R8:
   1842          break;
   1843        default:
   1844          debugf("unsupported format conversion from %x to %x\n",
   1845                 external_format, internal_format);
   1846          assert(false);
   1847          return;
   1848      }
   1849      break;
   1850    default:
   1851      break;
   1852  }
   1853  size_t row_bytes = width * bytes_for_internal_format(internal_format);
   1854  for (; height; height--) {
   1855    size_t len = row_bytes;
   1856    uint8_t* dst_ptr = dst_buf;
   1857    const uint8_t* src_ptr = src_buf;
   1858    if (clip_ptrs_against_bounds(dst_ptr, dst_bound0, dst_bound1, src_ptr,
   1859                                 src_bound0, src_bound1, len) > 0) {
   1860      return;
   1861    }
   1862    if (len) {
   1863      memcpy(dst_ptr, src_ptr, len);
   1864    }
   1865    dst_buf += dst_stride;
   1866    src_buf += src_stride;
   1867  }
   1868 }
   1869 
   1870 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
   1871                            GLsizei height, void* buf = nullptr,
   1872                            GLsizei stride = 0, GLsizei min_width = 0,
   1873                            GLsizei min_height = 0) {
   1874  GLenum internal_format = remap_internal_format(external_format);
   1875  bool changed = false;
   1876  if (t.width != width || t.height != height ||
   1877      t.internal_format != internal_format) {
   1878    changed = true;
   1879    t.internal_format = internal_format;
   1880    t.width = width;
   1881    t.height = height;
   1882  }
   1883  // If we are changed from an internally managed buffer to an externally
   1884  // supplied one or vice versa, ensure that we clean up old buffer state.
   1885  // However, if we have to convert the data from a non-native format, then
   1886  // always treat it as internally managed since we will need to copy to an
   1887  // internally managed native format buffer.
   1888  bool should_free = buf == nullptr || format_requires_conversion(
   1889                                           external_format, internal_format);
   1890  if (t.should_free() != should_free) {
   1891    changed = true;
   1892    t.cleanup();
   1893    t.set_should_free(should_free);
   1894  }
   1895  // If now an external buffer, explicitly set it...
   1896  if (!should_free) {
   1897    t.set_buffer(buf, stride);
   1898  }
   1899  t.disable_delayed_clear();
   1900  if (!t.allocate(changed, min_width, min_height)) {
   1901    out_of_memory();
   1902  }
   1903  // If we have a buffer that needs format conversion, then do that now.
   1904  if (buf && should_free) {
   1905    convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
   1906                 (uint8_t*)t.buf, (uint8_t*)t.end_ptr(), (const uint8_t*)buf,
   1907                 stride, nullptr, nullptr, width, height);
   1908  }
   1909 }
   1910 
   1911 extern "C" {
   1912 
   1913 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
   1914                  GLsizei width, GLsizei height) {
   1915  assert(levels == 1);
   1916  Texture& t = ctx->textures[ctx->get_binding(target)];
   1917  set_tex_storage(t, internal_format, width, height);
   1918 }
   1919 
   1920 GLenum internal_format_for_data(GLenum format, GLenum ty) {
   1921  if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
   1922    return GL_R8;
   1923  } else if ((format == GL_RGBA || format == GL_BGRA) &&
   1924             (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
   1925    return GL_RGBA8;
   1926  } else if (format == GL_RGBA && ty == GL_FLOAT) {
   1927    return GL_RGBA32F;
   1928  } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
   1929    return GL_RGBA32I;
   1930  } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
   1931    return GL_RG8;
   1932  } else if (format == GL_RGB_422_APPLE &&
   1933             ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
   1934    return GL_RGB_RAW_422_APPLE;
   1935  } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
   1936    return GL_R16;
   1937  } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
   1938    return GL_RG16;
   1939  } else {
   1940    debugf("unknown internal format for format %x, type %x\n", format, ty);
   1941    assert(false);
   1942    return 0;
   1943  }
   1944 }
   1945 
   1946 static Buffer* get_pixel_pack_buffer() {
   1947  return ctx->pixel_pack_buffer_binding
   1948             ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
   1949             : nullptr;
   1950 }
   1951 
   1952 static Buffer* get_pixel_unpack_buffer() {
   1953  return ctx->pixel_unpack_buffer_binding
   1954             ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
   1955             : nullptr;
   1956 }
   1957 
   1958 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
   1959                   GLsizei width, GLsizei height, GLenum format, GLenum ty,
   1960                   void* data) {
   1961  if (level != 0) {
   1962    assert(false);
   1963    return;
   1964  }
   1965  Buffer* pbo = get_pixel_unpack_buffer();
   1966  if (pbo) {
   1967    data = pbo->get_data(data);
   1968  }
   1969  if (!data) return;
   1970  Texture& t = ctx->textures[ctx->get_binding(target)];
   1971  IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
   1972  prepare_texture(t, &skip);
   1973  assert(xoffset + width <= t.width);
   1974  assert(yoffset + height <= t.height);
   1975  assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
   1976  GLsizei row_length =
   1977      ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
   1978  assert(t.internal_format == internal_format_for_data(format, ty));
   1979  int src_bpp = format_requires_conversion(format, t.internal_format)
   1980                    ? bytes_for_internal_format(format)
   1981                    : t.bpp();
   1982  if (!src_bpp || !t.buf) return;
   1983  convert_copy(format, t.internal_format,
   1984               (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
   1985               (uint8_t*)t.buf, (uint8_t*)t.end_ptr(), (const uint8_t*)data,
   1986               row_length * src_bpp, pbo ? (const uint8_t*)pbo->buf : nullptr,
   1987               pbo ? (const uint8_t*)pbo->end_ptr() : nullptr, width, height);
   1988 }
   1989 
   1990 void TexImage2D(GLenum target, GLint level, GLint internal_format,
   1991                GLsizei width, GLsizei height, GLint border, GLenum format,
   1992                GLenum ty, void* data) {
   1993  if (level != 0) {
   1994    assert(false);
   1995    return;
   1996  }
   1997  assert(border == 0);
   1998  TexStorage2D(target, 1, internal_format, width, height);
   1999  TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
   2000 }
   2001 
   2002 void GenerateMipmap(UNUSED GLenum target) {
   2003  // TODO: support mipmaps
   2004 }
   2005 
   2006 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
   2007  Texture& t = ctx->textures[texid];
   2008  switch (pname) {
   2009    case GL_TEXTURE_WRAP_S:
   2010      assert(param == GL_CLAMP_TO_EDGE);
   2011      break;
   2012    case GL_TEXTURE_WRAP_T:
   2013      assert(param == GL_CLAMP_TO_EDGE);
   2014      break;
   2015    case GL_TEXTURE_MIN_FILTER:
   2016      t.min_filter = param;
   2017      break;
   2018    case GL_TEXTURE_MAG_FILTER:
   2019      t.mag_filter = param;
   2020      break;
   2021    default:
   2022      break;
   2023  }
   2024 }
   2025 
   2026 void TexParameteri(GLenum target, GLenum pname, GLint param) {
   2027  SetTextureParameter(ctx->get_binding(target), pname, param);
   2028 }
   2029 
   2030 typedef Texture LockedTexture;
   2031 
   2032 // Lock the given texture to prevent modification.
   2033 LockedTexture* LockTexture(GLuint texId) {
   2034  Texture& tex = ctx->textures[texId];
   2035  if (!tex.buf) {
   2036    assert(tex.buf != nullptr);
   2037    return nullptr;
   2038  }
   2039  if (__sync_fetch_and_add(&tex.locked, 1) == 0) {
   2040    // If this is the first time locking the texture, flush any delayed clears.
   2041    prepare_texture(tex);
   2042  }
   2043  return (LockedTexture*)&tex;
   2044 }
   2045 
   2046 // Lock the given framebuffer's color attachment to prevent modification.
   2047 LockedTexture* LockFramebuffer(GLuint fboId) {
   2048  Framebuffer& fb = ctx->framebuffers[fboId];
   2049  // Only allow locking a framebuffer if it has a valid color attachment.
   2050  if (!fb.color_attachment) {
   2051    assert(fb.color_attachment != 0);
   2052    return nullptr;
   2053  }
   2054  return LockTexture(fb.color_attachment);
   2055 }
   2056 
   2057 // Reference an already locked resource
   2058 void LockResource(LockedTexture* resource) {
   2059  if (!resource) {
   2060    return;
   2061  }
   2062  __sync_fetch_and_add(&resource->locked, 1);
   2063 }
   2064 
   2065 // Remove a lock on a texture that has been previously locked
   2066 int32_t UnlockResource(LockedTexture* resource) {
   2067  if (!resource) {
   2068    return -1;
   2069  }
   2070  int32_t locked = __sync_fetch_and_add(&resource->locked, -1);
   2071  if (locked <= 0) {
   2072    // The lock should always be non-zero before unlocking.
   2073    assert(0);
   2074  } else if (locked == 1 && resource->zombie()) {
   2075    // If the resource is being kept alive by locks and this is the last lock,
   2076    // then delete the resource now.
   2077    delete resource;
   2078  }
   2079  return locked - 1;
   2080 }
   2081 
   2082 void GenTextures(int n, GLuint* result) {
   2083  for (int i = 0; i < n; i++) {
   2084    Texture t;
   2085    result[i] = ctx->textures.insert(t);
   2086  }
   2087 }
   2088 
   2089 void DeleteTexture(GLuint n) {
   2090  if (!n) {
   2091    return;
   2092  }
   2093  LockedTexture* tex = (LockedTexture*)ctx->textures.find(n);
   2094  if (!tex) {
   2095    return;
   2096  }
   2097  // Lock the texture so that it can't be deleted by another thread yet.
   2098  LockResource(tex);
   2099  // Forget the existing binding to the texture but keep it alive in case there
   2100  // are any other locks on it.
   2101  if (ctx->textures.erase(n, false)) {
   2102    for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
   2103      ctx->texture_units[i].unlink(n);
   2104    }
   2105  }
   2106  // Mark the texture as a zombie so that it will be freed if there are no other
   2107  // existing locks on it.
   2108  tex->set_zombie(true);
   2109  if (int32_t locked = UnlockResource(tex)) {
   2110    debugf("DeleteTexture(%u) with %d locks\n", n, locked);
   2111  }
   2112 }
   2113 
   2114 void GenRenderbuffers(int n, GLuint* result) {
   2115  for (int i = 0; i < n; i++) {
   2116    Renderbuffer r;
   2117    result[i] = ctx->renderbuffers.insert(r);
   2118  }
   2119 }
   2120 
   2121 void Renderbuffer::on_erase() {
   2122  for (auto* fb : ctx->framebuffers) {
   2123    if (fb) {
   2124      unlink(fb->color_attachment, texture);
   2125      unlink(fb->depth_attachment, texture);
   2126    }
   2127  }
   2128  DeleteTexture(texture);
   2129 }
   2130 
   2131 void DeleteRenderbuffer(GLuint n) {
   2132  if (n && ctx->renderbuffers.erase(n)) {
   2133    unlink(ctx->renderbuffer_binding, n);
   2134  }
   2135 }
   2136 
   2137 void GenFramebuffers(int n, GLuint* result) {
   2138  for (int i = 0; i < n; i++) {
   2139    Framebuffer f;
   2140    result[i] = ctx->framebuffers.insert(f);
   2141  }
   2142 }
   2143 
   2144 void DeleteFramebuffer(GLuint n) {
   2145  if (n && ctx->framebuffers.erase(n)) {
   2146    unlink(ctx->read_framebuffer_binding, n);
   2147    unlink(ctx->draw_framebuffer_binding, n);
   2148  }
   2149 }
   2150 
   2151 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
   2152                         GLsizei height) {
   2153  // Just refer a renderbuffer to a texture to simplify things for now...
   2154  Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
   2155  if (!r.texture) {
   2156    GenTextures(1, &r.texture);
   2157  }
   2158  switch (internal_format) {
   2159    case GL_DEPTH_COMPONENT:
   2160    case GL_DEPTH_COMPONENT16:
   2161    case GL_DEPTH_COMPONENT24:
   2162    case GL_DEPTH_COMPONENT32:
   2163      // Force depth format to 24 bits...
   2164      internal_format = GL_DEPTH_COMPONENT24;
   2165      break;
   2166  }
   2167  set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
   2168 }
   2169 
   2170 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
   2171                         GLsizei stride, GLuint offset) {
   2172  // debugf("cva: %d\n", ctx->current_vertex_array);
   2173  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2174  if (index >= NULL_ATTRIB) {
   2175    assert(0);
   2176    return;
   2177  }
   2178  VertexAttrib& va = v.attribs[index];
   2179  va.size = size * bytes_per_type(type);
   2180  va.type = type;
   2181  va.normalized = normalized;
   2182  va.stride = stride;
   2183  va.offset = offset;
   2184  // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
   2185  va.vertex_buffer = ctx->array_buffer_binding;
   2186  va.vertex_array = ctx->current_vertex_array;
   2187  ctx->validate_vertex_array = true;
   2188 }
   2189 
   2190 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
   2191                          GLuint offset) {
   2192  // debugf("cva: %d\n", ctx->current_vertex_array);
   2193  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2194  if (index >= NULL_ATTRIB) {
   2195    assert(0);
   2196    return;
   2197  }
   2198  VertexAttrib& va = v.attribs[index];
   2199  va.size = size * bytes_per_type(type);
   2200  va.type = type;
   2201  va.normalized = false;
   2202  va.stride = stride;
   2203  va.offset = offset;
   2204  // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
   2205  va.vertex_buffer = ctx->array_buffer_binding;
   2206  va.vertex_array = ctx->current_vertex_array;
   2207  ctx->validate_vertex_array = true;
   2208 }
   2209 
   2210 void EnableVertexAttribArray(GLuint index) {
   2211  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2212  if (index >= NULL_ATTRIB) {
   2213    assert(0);
   2214    return;
   2215  }
   2216  VertexAttrib& va = v.attribs[index];
   2217  if (!va.enabled) {
   2218    ctx->validate_vertex_array = true;
   2219  }
   2220  va.enabled = true;
   2221  v.max_attrib = max(v.max_attrib, (int)index);
   2222 }
   2223 
   2224 void DisableVertexAttribArray(GLuint index) {
   2225  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2226  if (index >= NULL_ATTRIB) {
   2227    assert(0);
   2228    return;
   2229  }
   2230  VertexAttrib& va = v.attribs[index];
   2231  if (va.enabled) {
   2232    ctx->validate_vertex_array = true;
   2233  }
   2234  va.disable();
   2235 }
   2236 
   2237 void VertexAttribDivisor(GLuint index, GLuint divisor) {
   2238  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2239  // Only support divisor being 0 (per-vertex) or 1 (per-instance).
   2240  if (index >= NULL_ATTRIB || divisor > 1) {
   2241    assert(0);
   2242    return;
   2243  }
   2244  VertexAttrib& va = v.attribs[index];
   2245  va.divisor = divisor;
   2246 }
   2247 
   2248 void BufferData(GLenum target, GLsizeiptr size, void* data,
   2249                UNUSED GLenum usage) {
   2250  Buffer& b = ctx->buffers[ctx->get_binding(target)];
   2251  if (size != b.size) {
   2252    if (!b.allocate(size)) {
   2253      out_of_memory();
   2254    }
   2255    ctx->validate_vertex_array = true;
   2256  }
   2257  if (data && b.buf && size <= b.size) {
   2258    memcpy(b.buf, data, size);
   2259  }
   2260 }
   2261 
   2262 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
   2263                   void* data) {
   2264  if (offset < 0) {
   2265    assert(0);
   2266    return;
   2267  }
   2268  GLsizeiptr uOffset = offset;
   2269  Buffer& b = ctx->buffers[ctx->get_binding(target)];
   2270  assert(uOffset < b.size && size <= b.size - uOffset);
   2271  if (data && b.buf && uOffset < b.size && size <= b.size - uOffset) {
   2272    memcpy(&b.buf[uOffset], data, size);
   2273  }
   2274 }
   2275 
   2276 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
   2277  Buffer& b = ctx->buffers[ctx->get_binding(target)];
   2278  return b.buf;
   2279 }
   2280 
   2281 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
   2282                     UNUSED GLbitfield access) {
   2283  Buffer& b = ctx->buffers[ctx->get_binding(target)];
   2284  if (!b.buf || offset < 0 || length == 0) {
   2285      return nullptr;
   2286  }
   2287 
   2288  GLsizeiptr uOffset = offset;
   2289  if (uOffset >= b.size || length > b.size - uOffset) {
   2290      return nullptr;
   2291  }
   2292 
   2293  return b.buf + offset;
   2294 }
   2295 
   2296 GLboolean UnmapBuffer(GLenum target) {
   2297  Buffer& b = ctx->buffers[ctx->get_binding(target)];
   2298  return b.buf != nullptr;
   2299 }
   2300 
   2301 void Uniform1i(GLint location, GLint V0) {
   2302  // debugf("tex: %d\n", (int)ctx->textures.size);
   2303  if (vertex_shader) {
   2304    vertex_shader->set_uniform_1i(location, V0);
   2305  }
   2306 }
   2307 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
   2308  assert(count == 1);
   2309  if (vertex_shader) {
   2310    vertex_shader->set_uniform_4fv(location, v);
   2311  }
   2312 }
   2313 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
   2314                      const GLfloat* value) {
   2315  assert(count == 1);
   2316  assert(!transpose);
   2317  if (vertex_shader) {
   2318    vertex_shader->set_uniform_matrix4fv(location, value);
   2319  }
   2320 }
   2321 
   2322 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
   2323                          GLuint texture, GLint level) {
   2324  assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
   2325  assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
   2326  assert(level == 0);
   2327  Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
   2328  if (attachment == GL_COLOR_ATTACHMENT0) {
   2329    fb.color_attachment = texture;
   2330  } else if (attachment == GL_DEPTH_ATTACHMENT) {
   2331    fb.depth_attachment = texture;
   2332  } else {
   2333    assert(0);
   2334  }
   2335 }
   2336 
   2337 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
   2338                             GLenum renderbuffertarget, GLuint renderbuffer) {
   2339  assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
   2340  assert(renderbuffertarget == GL_RENDERBUFFER);
   2341  Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
   2342  Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
   2343  if (attachment == GL_COLOR_ATTACHMENT0) {
   2344    fb.color_attachment = rb.texture;
   2345  } else if (attachment == GL_DEPTH_ATTACHMENT) {
   2346    fb.depth_attachment = rb.texture;
   2347  } else {
   2348    assert(0);
   2349  }
   2350 }
   2351 
   2352 }  // extern "C"
   2353 
   2354 static inline Framebuffer* get_framebuffer(GLenum target,
   2355                                           bool fallback = false) {
   2356  if (target == GL_FRAMEBUFFER) {
   2357    target = GL_DRAW_FRAMEBUFFER;
   2358  }
   2359  Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
   2360  if (fallback && !fb) {
   2361    // If the specified framebuffer isn't found and a fallback is requested,
   2362    // use the default framebuffer.
   2363    fb = &ctx->framebuffers[0];
   2364  }
   2365  return fb;
   2366 }
   2367 
   2368 template <typename T>
   2369 static inline void fill_n(T* dst, size_t n, T val) {
   2370  for (T* end = &dst[n]; dst < end; dst++) *dst = val;
   2371 }
   2372 
   2373 #if USE_SSE2
   2374 template <>
   2375 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
   2376  __asm__ __volatile__("rep stosl\n"
   2377                       : "+D"(dst), "+c"(n)
   2378                       : "a"(val)
   2379                       : "memory", "cc");
   2380 }
   2381 #endif
   2382 
   2383 static inline uint32_t clear_chunk(uint8_t value) {
   2384  return uint32_t(value) * 0x01010101U;
   2385 }
   2386 
   2387 static inline uint32_t clear_chunk(uint16_t value) {
   2388  return uint32_t(value) | (uint32_t(value) << 16);
   2389 }
   2390 
   2391 static inline uint32_t clear_chunk(uint32_t value) { return value; }
   2392 
   2393 template <typename T>
   2394 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
   2395  const size_t N = sizeof(uint32_t) / sizeof(T);
   2396  // fill any leading unaligned values
   2397  if (N > 1) {
   2398    size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
   2399    if (align <= len) {
   2400      fill_n(buf, align, value);
   2401      len -= align;
   2402      buf += align;
   2403    }
   2404  }
   2405  // fill as many aligned chunks as possible
   2406  fill_n((uint32_t*)buf, len / N, chunk);
   2407  // fill any remaining values
   2408  if (N > 1) {
   2409    fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
   2410  }
   2411 }
   2412 
   2413 template <typename T>
   2414 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
   2415                         int skip_end = 0) {
   2416  if (!t.buf) return;
   2417  skip_start = max(skip_start, bb.x0);
   2418  skip_end = max(skip_end, skip_start);
   2419  assert(sizeof(T) == t.bpp());
   2420  size_t stride = t.stride();
   2421  // When clearing multiple full-width rows, collapse them into a single large
   2422  // "row" to avoid redundant setup from clearing each row individually. Note
   2423  // that we can only safely do this if the stride is tightly packed.
   2424  if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
   2425      (t.should_free() || stride == t.width * sizeof(T))) {
   2426    bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
   2427    bb.y1 = bb.y0 + 1;
   2428  }
   2429  T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
   2430  uint32_t chunk = clear_chunk(value);
   2431  for (int rows = bb.height(); rows > 0; rows--) {
   2432    if (bb.x0 < skip_start) {
   2433      clear_row(buf, skip_start - bb.x0, value, chunk);
   2434    }
   2435    if (skip_end < bb.x1) {
   2436      clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
   2437    }
   2438    buf += stride / sizeof(T);
   2439  }
   2440 }
   2441 
   2442 template <typename T>
   2443 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
   2444                                   int skip_end = 0) {
   2445  assert(t.buf != nullptr);
   2446  assert(sizeof(T) == t.bpp());
   2447  assert(skip_start <= skip_end);
   2448  T* buf = (T*)t.sample_ptr(0, y);
   2449  uint32_t chunk = clear_chunk((T)t.clear_val);
   2450  if (skip_start > 0) {
   2451    clear_row<T>(buf, skip_start, t.clear_val, chunk);
   2452  }
   2453  if (skip_end < t.width) {
   2454    clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
   2455  }
   2456 }
   2457 
   2458 template <typename T>
   2459 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
   2460  if (!t.delay_clear || !t.cleared_rows) {
   2461    return;
   2462  }
   2463  int y0 = 0;
   2464  int y1 = t.height;
   2465  int skip_start = 0;
   2466  int skip_end = 0;
   2467  if (skip) {
   2468    y0 = clamp(skip->y0, 0, t.height);
   2469    y1 = clamp(skip->y1, y0, t.height);
   2470    skip_start = clamp(skip->x0, 0, t.width);
   2471    skip_end = clamp(skip->x1, skip_start, t.width);
   2472    if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
   2473      t.disable_delayed_clear();
   2474      return;
   2475    }
   2476  }
   2477  int num_masks = (y1 + 31) / 32;
   2478  uint32_t* rows = t.cleared_rows;
   2479  for (int i = y0 / 32; i < num_masks; i++) {
   2480    uint32_t mask = rows[i];
   2481    if (mask != ~0U) {
   2482      rows[i] = ~0U;
   2483      int start = i * 32;
   2484      while (mask) {
   2485        int count = __builtin_ctz(mask);
   2486        if (count > 0) {
   2487          clear_buffer<T>(t, t.clear_val,
   2488                          IntRect{0, start, t.width, start + count}, skip_start,
   2489                          skip_end);
   2490          t.delay_clear -= count;
   2491          start += count;
   2492          mask >>= count;
   2493        }
   2494        count = __builtin_ctz(mask + 1);
   2495        start += count;
   2496        mask >>= count;
   2497      }
   2498      int count = (i + 1) * 32 - start;
   2499      if (count > 0) {
   2500        clear_buffer<T>(t, t.clear_val,
   2501                        IntRect{0, start, t.width, start + count}, skip_start,
   2502                        skip_end);
   2503        t.delay_clear -= count;
   2504      }
   2505    }
   2506  }
   2507  if (t.delay_clear <= 0) t.disable_delayed_clear();
   2508 }
   2509 
   2510 static void prepare_texture(Texture& t, const IntRect* skip) {
   2511  if (t.delay_clear) {
   2512    switch (t.internal_format) {
   2513      case GL_RGBA8:
   2514        force_clear<uint32_t>(t, skip);
   2515        break;
   2516      case GL_R8:
   2517        force_clear<uint8_t>(t, skip);
   2518        break;
   2519      case GL_RG8:
   2520        force_clear<uint16_t>(t, skip);
   2521        break;
   2522      default:
   2523        assert(false);
   2524        break;
   2525    }
   2526  }
   2527 }
   2528 
   2529 // Setup a clear on a texture. This may either force an immediate clear or
   2530 // potentially punt to a delayed clear, if applicable.
   2531 template <typename T>
   2532 static void request_clear(Texture& t, T value, const IntRect& scissor) {
   2533  // If the clear would require a scissor, force clear anything outside
   2534  // the scissor, and then immediately clear anything inside the scissor.
   2535  if (!scissor.contains(t.offset_bounds())) {
   2536    IntRect skip = scissor - t.offset;
   2537    force_clear<T>(t, &skip);
   2538    clear_buffer<T>(t, value, skip.intersection(t.bounds()));
   2539  } else {
   2540    // Do delayed clear for 2D texture without scissor.
   2541    t.enable_delayed_clear(value);
   2542  }
   2543 }
   2544 
   2545 template <typename T>
   2546 static inline void request_clear(Texture& t, T value) {
   2547  // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
   2548  // the entire texture bounds.
   2549  request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
   2550 }
   2551 
   2552 extern "C" {
   2553 
   2554 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
   2555                            void* buf) {
   2556  Framebuffer& fb = ctx->framebuffers[0];
   2557  if (!fb.color_attachment) {
   2558    GenTextures(1, &fb.color_attachment);
   2559  }
   2560  // If the dimensions or buffer properties changed, we need to reallocate
   2561  // the underlying storage for the color buffer texture.
   2562  Texture& colortex = ctx->textures[fb.color_attachment];
   2563  set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
   2564  colortex.offset = IntPoint(x, y);
   2565  if (!fb.depth_attachment) {
   2566    GenTextures(1, &fb.depth_attachment);
   2567  }
   2568  // Ensure dimensions of the depth buffer match the color buffer.
   2569  Texture& depthtex = ctx->textures[fb.depth_attachment];
   2570  set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
   2571  depthtex.offset = IntPoint(x, y);
   2572 }
   2573 
   2574 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
   2575                     int32_t* height, int32_t* stride) {
   2576  Framebuffer* fb = ctx->framebuffers.find(fbo);
   2577  if (!fb || !fb->color_attachment) {
   2578    return nullptr;
   2579  }
   2580  Texture& colortex = ctx->textures[fb->color_attachment];
   2581  if (flush) {
   2582    prepare_texture(colortex);
   2583  }
   2584  assert(colortex.offset == IntPoint(0, 0));
   2585  if (width) {
   2586    *width = colortex.width;
   2587  }
   2588  if (height) {
   2589    *height = colortex.height;
   2590  }
   2591  if (stride) {
   2592    *stride = colortex.stride();
   2593  }
   2594  return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
   2595 }
   2596 
   2597 void ResolveFramebuffer(GLuint fbo) {
   2598  Framebuffer* fb = ctx->framebuffers.find(fbo);
   2599  if (!fb || !fb->color_attachment) {
   2600    return;
   2601  }
   2602  Texture& colortex = ctx->textures[fb->color_attachment];
   2603  prepare_texture(colortex);
   2604 }
   2605 
   2606 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
   2607                      GLsizei height, GLsizei stride, void* buf,
   2608                      GLsizei min_width, GLsizei min_height) {
   2609  Texture& t = ctx->textures[texid];
   2610  set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
   2611                  min_height);
   2612 }
   2613 
   2614 GLenum CheckFramebufferStatus(GLenum target) {
   2615  Framebuffer* fb = get_framebuffer(target);
   2616  if (!fb || !fb->color_attachment) {
   2617    return GL_FRAMEBUFFER_UNSUPPORTED;
   2618  }
   2619  return GL_FRAMEBUFFER_COMPLETE;
   2620 }
   2621 
   2622 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
   2623                      GLint zoffset, GLsizei width, GLsizei height,
   2624                      GLsizei depth, GLenum format, GLenum type,
   2625                      const void* data) {
   2626  if (level != 0) {
   2627    assert(false);
   2628    return;
   2629  }
   2630  Texture& t = ctx->textures[texture];
   2631  assert(!t.locked);
   2632  if (width <= 0 || height <= 0 || depth <= 0) {
   2633    return;
   2634  }
   2635  assert(zoffset == 0 && depth == 1);
   2636  IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
   2637  if (t.internal_format == GL_DEPTH_COMPONENT24) {
   2638    uint32_t value = 0xFFFFFF;
   2639    switch (format) {
   2640      case GL_DEPTH_COMPONENT:
   2641        switch (type) {
   2642          case GL_DOUBLE:
   2643            value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
   2644            break;
   2645          case GL_FLOAT:
   2646            value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
   2647            break;
   2648          default:
   2649            assert(false);
   2650            break;
   2651        }
   2652        break;
   2653      default:
   2654        assert(false);
   2655        break;
   2656    }
   2657    if (t.cleared() && !scissor.contains(t.offset_bounds())) {
   2658      // If we need to scissor the clear and the depth buffer was already
   2659      // initialized, then just fill runs for that scissor area.
   2660      t.fill_depth_runs(value, scissor);
   2661    } else {
   2662      // Otherwise, the buffer is either uninitialized or the clear would
   2663      // encompass the entire buffer. If uninitialized, we can safely fill
   2664      // the entire buffer with any value and thus ignore any scissoring.
   2665      t.init_depth_runs(value);
   2666    }
   2667    return;
   2668  }
   2669 
   2670  uint32_t color = 0xFF000000;
   2671  switch (type) {
   2672    case GL_FLOAT: {
   2673      const GLfloat* f = (const GLfloat*)data;
   2674      Float v = {0.0f, 0.0f, 0.0f, 1.0f};
   2675      switch (format) {
   2676        case GL_RGBA:
   2677          v.w = f[3];  // alpha
   2678          FALLTHROUGH;
   2679        case GL_RGB:
   2680          v.z = f[2];  // blue
   2681          FALLTHROUGH;
   2682        case GL_RG:
   2683          v.y = f[1];  // green
   2684          FALLTHROUGH;
   2685        case GL_RED:
   2686          v.x = f[0];  // red
   2687          break;
   2688        default:
   2689          assert(false);
   2690          break;
   2691      }
   2692      color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
   2693      break;
   2694    }
   2695    case GL_UNSIGNED_BYTE: {
   2696      const GLubyte* b = (const GLubyte*)data;
   2697      switch (format) {
   2698        case GL_RGBA:
   2699          color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24);  // alpha
   2700          FALLTHROUGH;
   2701        case GL_RGB:
   2702          color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16);  // blue
   2703          FALLTHROUGH;
   2704        case GL_RG:
   2705          color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8);  // green
   2706          FALLTHROUGH;
   2707        case GL_RED:
   2708          color = (color & ~0x000000FF) | uint32_t(b[0]);  // red
   2709          break;
   2710        default:
   2711          assert(false);
   2712          break;
   2713      }
   2714      break;
   2715    }
   2716    default:
   2717      assert(false);
   2718      break;
   2719  }
   2720 
   2721  switch (t.internal_format) {
   2722    case GL_RGBA8:
   2723      // Clear color needs to swizzle to BGRA.
   2724      request_clear<uint32_t>(t,
   2725                              (color & 0xFF00FF00) |
   2726                                  ((color << 16) & 0xFF0000) |
   2727                                  ((color >> 16) & 0xFF),
   2728                              scissor);
   2729      break;
   2730    case GL_R8:
   2731      request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
   2732      break;
   2733    case GL_RG8:
   2734      request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
   2735      break;
   2736    default:
   2737      assert(false);
   2738      break;
   2739  }
   2740 }
   2741 
   2742 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
   2743                   const void* data) {
   2744  Texture& t = ctx->textures[texture];
   2745  IntRect scissor = t.offset_bounds();
   2746  ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
   2747                   scissor.height(), 1, format, type, data);
   2748 }
   2749 
   2750 void Clear(GLbitfield mask) {
   2751  Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
   2752  if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
   2753    Texture& t = ctx->textures[fb.color_attachment];
   2754    IntRect scissor = ctx->scissortest
   2755                          ? ctx->scissor.intersection(t.offset_bounds())
   2756                          : t.offset_bounds();
   2757    ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
   2758                     scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
   2759                     ctx->clearcolor);
   2760  }
   2761  if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
   2762    Texture& t = ctx->textures[fb.depth_attachment];
   2763    IntRect scissor = ctx->scissortest
   2764                          ? ctx->scissor.intersection(t.offset_bounds())
   2765                          : t.offset_bounds();
   2766    ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
   2767                     scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
   2768                     GL_DOUBLE, &ctx->cleardepth);
   2769  }
   2770 }
   2771 
   2772 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
   2773                    GLsizei height, GLfloat r, GLfloat g, GLfloat b,
   2774                    GLfloat a) {
   2775  GLfloat color[] = {r, g, b, a};
   2776  Framebuffer& fb = ctx->framebuffers[fbo];
   2777  Texture& t = ctx->textures[fb.color_attachment];
   2778  IntRect scissor =
   2779      IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
   2780          t.offset_bounds());
   2781  ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
   2782                   scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
   2783                   color);
   2784 }
   2785 
   2786 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
   2787                           const GLenum* attachments) {
   2788  Framebuffer* fb = get_framebuffer(target);
   2789  if (!fb || num_attachments <= 0 || !attachments) {
   2790    return;
   2791  }
   2792  for (GLsizei i = 0; i < num_attachments; i++) {
   2793    switch (attachments[i]) {
   2794      case GL_DEPTH_ATTACHMENT: {
   2795        Texture& t = ctx->textures[fb->depth_attachment];
   2796        t.set_cleared(false);
   2797        break;
   2798      }
   2799      case GL_COLOR_ATTACHMENT0: {
   2800        Texture& t = ctx->textures[fb->color_attachment];
   2801        t.disable_delayed_clear();
   2802        break;
   2803      }
   2804    }
   2805  }
   2806 }
   2807 
   2808 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
   2809                GLenum type, void* data) {
   2810  Buffer* pbo = get_pixel_pack_buffer();
   2811  if (pbo) {
   2812    data = pbo->get_data(data);
   2813  }
   2814  if (!data) return;
   2815  Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
   2816  if (!fb) return;
   2817  assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
   2818         format == GL_BGRA || format == GL_RG);
   2819  Texture& t = ctx->textures[fb->color_attachment];
   2820  if (!t.buf) return;
   2821  prepare_texture(t);
   2822  // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
   2823  // width, height, ctx->read_framebuffer_binding, t.internal_format);
   2824  x -= t.offset.x;
   2825  y -= t.offset.y;
   2826  assert(x >= 0 && y >= 0);
   2827  assert(x + width <= t.width);
   2828  assert(y + height <= t.height);
   2829  if (internal_format_for_data(format, type) != t.internal_format) {
   2830    debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
   2831           internal_format_for_data(format, type));
   2832    assert(false);
   2833    return;
   2834  }
   2835  // Only support readback conversions that are reversible
   2836  assert(!format_requires_conversion(format, t.internal_format) ||
   2837         bytes_for_internal_format(format) == t.bpp());
   2838  uint8_t* dest = (uint8_t*)data;
   2839  size_t destStride = width * t.bpp();
   2840  if (y < 0) {
   2841    dest += -y * destStride;
   2842    height += y;
   2843    y = 0;
   2844  }
   2845  if (y + height > t.height) {
   2846    height = t.height - y;
   2847  }
   2848  if (x < 0) {
   2849    dest += -x * t.bpp();
   2850    width += x;
   2851    x = 0;
   2852  }
   2853  if (x + width > t.width) {
   2854    width = t.width - x;
   2855  }
   2856  if (width <= 0 || height <= 0) {
   2857    return;
   2858  }
   2859  convert_copy(format, t.internal_format, dest, destStride,
   2860               pbo ? (uint8_t*)pbo->buf : nullptr,
   2861               pbo ? (uint8_t*)pbo->end_ptr() : nullptr,
   2862               (const uint8_t*)t.sample_ptr(x, y), t.stride(),
   2863               (const uint8_t*)t.buf, (const uint8_t*)t.end_ptr(), width,
   2864               height);
   2865 }
   2866 
   2867 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
   2868                      GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
   2869                      GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
   2870                      GLint dstY, GLint dstZ, GLsizei srcWidth,
   2871                      GLsizei srcHeight, GLsizei srcDepth) {
   2872  assert(srcLevel == 0 && dstLevel == 0);
   2873  assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
   2874  if (srcTarget == GL_RENDERBUFFER) {
   2875    Renderbuffer& rb = ctx->renderbuffers[srcName];
   2876    srcName = rb.texture;
   2877  }
   2878  if (dstTarget == GL_RENDERBUFFER) {
   2879    Renderbuffer& rb = ctx->renderbuffers[dstName];
   2880    dstName = rb.texture;
   2881  }
   2882  Texture& srctex = ctx->textures[srcName];
   2883  if (!srctex.buf) return;
   2884  prepare_texture(srctex);
   2885  Texture& dsttex = ctx->textures[dstName];
   2886  if (!dsttex.buf) return;
   2887  assert(!dsttex.locked);
   2888  IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
   2889  prepare_texture(dsttex, &skip);
   2890  assert(srctex.internal_format == dsttex.internal_format);
   2891  assert(srcWidth >= 0);
   2892  assert(srcHeight >= 0);
   2893  assert(srcX + srcWidth <= srctex.width);
   2894  assert(srcY + srcHeight <= srctex.height);
   2895  assert(dstX + srcWidth <= dsttex.width);
   2896  assert(dstY + srcHeight <= dsttex.height);
   2897  int bpp = srctex.bpp();
   2898  int src_stride = srctex.stride();
   2899  int dest_stride = dsttex.stride();
   2900  char* dest = dsttex.sample_ptr(dstX, dstY);
   2901  const char* src = srctex.sample_ptr(srcX, srcY);
   2902  for (int y = 0; y < srcHeight; y++) {
   2903    char* dst_ptr = dest;
   2904    const char* src_ptr = src;
   2905    size_t len = size_t(srcWidth) * bpp;
   2906    if (clip_ptrs_against_bounds(dst_ptr, dsttex.buf, dsttex.end_ptr(), src_ptr,
   2907                                 srctex.buf, srctex.end_ptr(), len) > 0) {
   2908      break;
   2909    }
   2910    if (len) {
   2911      memcpy(dst_ptr, src_ptr, len);
   2912    }
   2913    dest += dest_stride;
   2914    src += src_stride;
   2915  }
   2916 }
   2917 
   2918 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
   2919                       GLint yoffset, GLint x, GLint y, GLsizei width,
   2920                       GLsizei height) {
   2921  assert(level == 0);
   2922  Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
   2923  if (!fb) return;
   2924  CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
   2925                   ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
   2926                   0, width, height, 1);
   2927 }
   2928 
   2929 }  // extern "C"
   2930 
   2931 #include "blend.h"
   2932 #include "composite.h"
   2933 #include "swgl_ext.h"
   2934 
   2935 #pragma GCC diagnostic push
   2936 #pragma GCC diagnostic ignored "-Wuninitialized"
   2937 #pragma GCC diagnostic ignored "-Wunused-function"
   2938 #pragma GCC diagnostic ignored "-Wunused-parameter"
   2939 #pragma GCC diagnostic ignored "-Wunused-variable"
   2940 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
   2941 #ifdef __clang__
   2942 #  pragma GCC diagnostic ignored "-Wunused-private-field"
   2943 #else
   2944 #  pragma GCC diagnostic ignored "-Wunused-but-set-variable"
   2945 #endif
   2946 #include "load_shader.h"
   2947 #pragma GCC diagnostic pop
   2948 
   2949 #include "rasterize.h"
   2950 
   2951 void VertexArray::validate() {
   2952  int last_enabled = -1;
   2953  for (int i = 0; i <= max_attrib; i++) {
   2954    VertexAttrib& attr = attribs[i];
   2955    if (attr.enabled) {
   2956      // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
   2957      Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
   2958      attr.buf = vertex_buf.buf;
   2959      attr.buf_size = vertex_buf.size;
   2960      // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
   2961      // attr.offset, attr.divisor);
   2962      last_enabled = i;
   2963    }
   2964  }
   2965  max_attrib = last_enabled;
   2966 }
   2967 
   2968 extern "C" {
   2969 
   2970 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
   2971                           GLintptr offset, GLsizei instancecount) {
   2972  if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
   2973      !fragment_shader) {
   2974    return;
   2975  }
   2976 
   2977  Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
   2978  if (!fb.color_attachment) {
   2979    return;
   2980  }
   2981  Texture& colortex = ctx->textures[fb.color_attachment];
   2982  if (!colortex.buf) {
   2983    return;
   2984  }
   2985  assert(!colortex.locked);
   2986  assert(colortex.internal_format == GL_RGBA8 ||
   2987         colortex.internal_format == GL_R8);
   2988  Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
   2989  if (depthtex.buf) {
   2990    assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
   2991    assert(colortex.width == depthtex.width &&
   2992           colortex.height == depthtex.height);
   2993    assert(colortex.offset == depthtex.offset);
   2994  }
   2995 
   2996  // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
   2997  // debugf("indices size: %d\n", indices_buf.size);
   2998  VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
   2999  if (ctx->validate_vertex_array) {
   3000    ctx->validate_vertex_array = false;
   3001    v.validate();
   3002  }
   3003 
   3004 #ifdef PRINT_TIMINGS
   3005  uint64_t start = get_time_value();
   3006 #endif
   3007 
   3008  ctx->shaded_rows = 0;
   3009  ctx->shaded_pixels = 0;
   3010 
   3011  vertex_shader->init_batch();
   3012 
   3013  switch (type) {
   3014    case GL_UNSIGNED_SHORT:
   3015      assert(mode == GL_TRIANGLES);
   3016      draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
   3017                              depthtex);
   3018      break;
   3019    case GL_UNSIGNED_INT:
   3020      assert(mode == GL_TRIANGLES);
   3021      draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
   3022                              depthtex);
   3023      break;
   3024    case GL_NONE:
   3025      // Non-standard GL extension - if element type is GL_NONE, then we don't
   3026      // use any element buffer and behave as if DrawArrays was called instead.
   3027      for (GLsizei instance = 0; instance < instancecount; instance++) {
   3028        switch (mode) {
   3029          case GL_LINES:
   3030            for (GLsizei i = 0; i + 2 <= count; i += 2) {
   3031              vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
   3032              draw_quad(2, colortex, depthtex);
   3033            }
   3034            break;
   3035          case GL_TRIANGLES:
   3036            for (GLsizei i = 0; i + 3 <= count; i += 3) {
   3037              vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
   3038              draw_quad(3, colortex, depthtex);
   3039            }
   3040            break;
   3041          default:
   3042            assert(false);
   3043            break;
   3044        }
   3045      }
   3046      break;
   3047    default:
   3048      assert(false);
   3049      break;
   3050  }
   3051 
   3052  if (ctx->samples_passed_query) {
   3053    Query& q = ctx->queries[ctx->samples_passed_query];
   3054    q.value += ctx->shaded_pixels;
   3055  }
   3056 
   3057 #ifdef PRINT_TIMINGS
   3058  uint64_t end = get_time_value();
   3059  printf(
   3060      "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
   3061      "%fns/pixel)\n",
   3062      double(end - start) / (1000. * 1000.),
   3063      ctx->programs[ctx->current_program].impl->get_name(), instancecount,
   3064      ctx->shaded_pixels, ctx->shaded_rows,
   3065      double(ctx->shaded_pixels) / ctx->shaded_rows,
   3066      double(end - start) / max(ctx->shaded_pixels, 1));
   3067 #endif
   3068 }
   3069 
   3070 void Finish() {
   3071 #ifdef PRINT_TIMINGS
   3072  printf("Finish\n");
   3073 #endif
   3074 }
   3075 
   3076 void MakeCurrent(Context* c) {
   3077  if (ctx == c) {
   3078    return;
   3079  }
   3080  ctx = c;
   3081  setup_program(ctx ? ctx->current_program : 0);
   3082 }
   3083 
   3084 Context* CreateContext() { return new Context; }
   3085 
   3086 void ReferenceContext(Context* c) {
   3087  if (!c) {
   3088    return;
   3089  }
   3090  ++c->references;
   3091 }
   3092 
   3093 void DestroyContext(Context* c) {
   3094  if (!c) {
   3095    return;
   3096  }
   3097  assert(c->references > 0);
   3098  --c->references;
   3099  if (c->references > 0) {
   3100    return;
   3101  }
   3102  if (ctx == c) {
   3103    MakeCurrent(nullptr);
   3104  }
   3105  delete c;
   3106 }
   3107 
   3108 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(const void*)) {
   3109  size_t size = 0;
   3110  if (ctx) {
   3111    for (auto& t : ctx->textures) {
   3112      if (t && t->should_free()) {
   3113        size += size_of_op(t->buf);
   3114      }
   3115    }
   3116  }
   3117  return size;
   3118 }
   3119 }  // extern "C"