tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

glsl.h (79873B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #define SI ALWAYS_INLINE static
      6 
      7 #include "vector_type.h"
      8 
      9 namespace glsl {
     10 
     11 enum TextureFormat { RGBA32F, RGBA32I, RGBA8, R8, RG8, R16, RG16, YUY2 };
     12 
     13 enum TextureFilter { NEAREST, LINEAR };
     14 
     15 struct samplerCommon {
     16  uint32_t* buf = nullptr;
     17  uint32_t stride = 0;  // in units of BPP if < 4, or dwords if BPP >= 4
     18  uint32_t height = 0;
     19  uint32_t width = 0;
     20  TextureFormat format = TextureFormat::RGBA8;
     21 };
     22 
     23 struct samplerFilter {
     24  TextureFilter filter = TextureFilter::NEAREST;
     25 };
     26 
     27 struct sampler2D_impl : samplerCommon, samplerFilter {};
     28 typedef sampler2D_impl* sampler2D;
     29 
     30 typedef struct sampler2DR8_impl : sampler2D_impl{} * sampler2DR8;
     31 typedef struct sampler2DRG8_impl : sampler2D_impl{} * sampler2DRG8;
     32 typedef struct sampler2DRGBA8_impl : sampler2D_impl{} * sampler2DRGBA8;
     33 typedef struct sampler2DRGBA32F_impl : sampler2D_impl{} * sampler2DRGBA32F;
     34 
     35 struct isampler2D_impl : samplerCommon {};
     36 typedef isampler2D_impl* isampler2D;
     37 
     38 struct isampler2DRGBA32I_impl : isampler2D_impl {};
     39 typedef isampler2DRGBA32I_impl* isampler2DRGBA32I;
     40 
     41 struct sampler2DRect_impl : samplerCommon, samplerFilter {};
     42 typedef sampler2DRect_impl* sampler2DRect;
     43 
     44 #if USE_SSE2
     45 SI bool test_all(Bool cond) { return _mm_movemask_ps(cond) == 0xF; }
     46 SI bool test_any(Bool cond) { return _mm_movemask_ps(cond) != 0; }
     47 SI bool test_none(Bool cond) { return _mm_movemask_ps(cond) == 0; }
     48 #else
     49 SI bool test_all(Bool cond) {
     50  return bit_cast<uint32_t>(CONVERT(cond, U8)) == 0xFFFFFFFFU;
     51 }
     52 SI bool test_any(Bool cond) {
     53  return bit_cast<uint32_t>(CONVERT(cond, U8)) != 0;
     54 }
     55 SI bool test_none(Bool cond) {
     56  return bit_cast<uint32_t>(CONVERT(cond, U8)) == 0;
     57 }
     58 #endif
     59 SI bool test_equal(Bool cond) { return test_none(cond != cond.x); }
     60 
     61 float make_float(float n) { return n; }
     62 
     63 float make_float(int32_t n) { return float(n); }
     64 
     65 float make_float(uint32_t n) { return float(n); }
     66 
     67 float make_float(bool n) { return float(n); }
     68 
     69 template <typename T>
     70 Float make_float(T v) {
     71  return CONVERT(v, Float);
     72 }
     73 
     74 int32_t make_int(uint32_t n) { return n; }
     75 
     76 int32_t make_int(int32_t n) { return n; }
     77 
     78 int32_t make_int(float n) { return int32_t(n); }
     79 
     80 int32_t make_int(bool n) { return int32_t(n); }
     81 
     82 template <typename T>
     83 I32 make_int(T v) {
     84  return CONVERT(v, I32);
     85 }
     86 
     87 uint32_t make_uint(uint32_t n) { return n; }
     88 
     89 uint32_t make_uint(int32_t n) { return n; }
     90 
     91 uint32_t make_uint(float n) { return uint32_t(n); }
     92 
     93 uint32_t make_uint(bool n) { return uint32_t(n); }
     94 
     95 template <typename T>
     96 U32 make_uint(T v) {
     97  return CONVERT(v, U32);
     98 }
     99 
    100 template <typename T>
    101 T force_scalar(T n) {
    102  return n;
    103 }
    104 
    105 float force_scalar(Float f) { return f[0]; }
    106 
    107 int32_t force_scalar(I32 i) { return i[0]; }
    108 
    109 struct vec3;
    110 struct vec4;
    111 struct ivec2;
    112 
    113 SI int32_t if_then_else(int32_t c, int32_t t, int32_t e) { return c ? t : e; }
    114 SI int32_t if_then_else(bool c, int32_t t, int32_t e) { return c ? t : e; }
    115 
    116 SI float if_then_else(int32_t c, float t, float e) { return c ? t : e; }
    117 
    118 SI Float if_then_else(I32 c, float t, float e) {
    119  return bit_cast<Float>((c & bit_cast<I32>(Float(t))) |
    120                         (~c & bit_cast<I32>(Float(e))));
    121 }
    122 
    123 SI I32 if_then_else(I32 c, int32_t t, int32_t e) {
    124  return (c & I32(t)) | (~c & I32(e));
    125 }
    126 
    127 SI U32 if_then_else(I32 c, U32 t, U32 e) {
    128  return bit_cast<U32>((c & bit_cast<I32>(t)) | (~c & bit_cast<I32>(e)));
    129 }
    130 
    131 // Cheaper version of if_then_else that returns Float(0) if condition is false.
    132 SI Float if_then(I32 c, Float t) {
    133  return bit_cast<Float>(c & bit_cast<I32>(t));
    134 }
    135 
    136 SI Float if_then_else(I32 c, Float t, Float e) {
    137  return bit_cast<Float>((c & bit_cast<I32>(t)) | (~c & bit_cast<I32>(e)));
    138 }
    139 
    140 SI Float if_then_else(int32_t c, Float t, Float e) { return c ? t : e; }
    141 
    142 SI Bool if_then_else(I32 c, Bool t, Bool e) { return (c & t) | (~c & e); }
    143 
    144 SI Bool if_then_else(int32_t c, Bool t, Bool e) { return c ? t : e; }
    145 
    146 SI I16 if_then_else(I16 c, I16 t, I16 e) { return (c & t) | (~c & e); }
    147 
    148 template <typename T>
    149 SI void swap(T& a, T& b) {
    150  T t(a);
    151  a = b;
    152  b = t;
    153 }
    154 
    155 SI int32_t min(int32_t a, int32_t b) { return a < b ? a : b; }
    156 SI int32_t max(int32_t a, int32_t b) { return a > b ? a : b; }
    157 
    158 SI int32_t clamp(int32_t a, int32_t minVal, int32_t maxVal) {
    159  return min(max(a, minVal), maxVal);
    160 }
    161 
    162 SI float min(float a, float b) { return a < b ? a : b; }
    163 SI float max(float a, float b) { return a > b ? a : b; }
    164 
    165 SI float clamp(float a, float minVal, float maxVal) {
    166  return min(max(a, minVal), maxVal);
    167 }
    168 
    169 SI Float min(Float a, Float b) {
    170 #if USE_SSE2
    171  return _mm_min_ps(a, b);
    172 #elif USE_NEON
    173  return vminq_f32(a, b);
    174 #else
    175  return if_then_else(a < b, a, b);
    176 #endif
    177 }
    178 
    179 SI Float max(Float a, Float b) {
    180 #if USE_SSE2
    181  return _mm_max_ps(a, b);
    182 #elif USE_NEON
    183  return vmaxq_f32(a, b);
    184 #else
    185  return if_then_else(a > b, a, b);
    186 #endif
    187 }
    188 
    189 SI Float clamp(Float a, Float minVal, Float maxVal) {
    190  return min(max(a, minVal), maxVal);
    191 }
    192 
    193 #define sqrt __glsl_sqrt
    194 
    195 SI float sqrt(float x) { return sqrtf(x); }
    196 
    197 SI Float sqrt(Float v) {
    198 #if USE_SSE2
    199  return _mm_sqrt_ps(v);
    200 #elif USE_NEON
    201  Float e = vrsqrteq_f32(v);
    202  e *= vrsqrtsq_f32(v, e * e);
    203  e *= vrsqrtsq_f32(v, e * e);
    204  return if_then(v != Float(0.0f), v * e);
    205 #else
    206  return (Float){sqrtf(v.x), sqrtf(v.y), sqrtf(v.z), sqrtf(v.w)};
    207 #endif
    208 }
    209 
    210 SI float recip(float x) {
    211 #if USE_SSE2
    212  return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(x)));
    213 #else
    214  return 1.0f / x;
    215 #endif
    216 }
    217 
    218 // Use a fast vector reciprocal approximation when available. This should only
    219 // be used in cases where it is okay that the approximation is imprecise -
    220 // essentially visually correct but numerically wrong. Otherwise just rely on
    221 // however the compiler would implement slower division if the platform doesn't
    222 // provide a convenient intrinsic.
    223 SI Float recip(Float v) {
    224 #if USE_SSE2
    225  return _mm_rcp_ps(v);
    226 #elif USE_NEON
    227  Float e = vrecpeq_f32(v);
    228  return vrecpsq_f32(v, e) * e;
    229 #else
    230  return 1.0f / v;
    231 #endif
    232 }
    233 
    234 SI float inversesqrt(float x) {
    235 #if USE_SSE2
    236  return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
    237 #else
    238  return 1.0f / sqrtf(x);
    239 #endif
    240 }
    241 
    242 SI Float inversesqrt(Float v) {
    243 #if USE_SSE2
    244  return _mm_rsqrt_ps(v);
    245 #elif USE_NEON
    246  Float e = vrsqrteq_f32(v);
    247  return vrsqrtsq_f32(v, e * e) * e;
    248 #else
    249  return 1.0f / sqrt(v);
    250 #endif
    251 }
    252 
    253 SI float step(float edge, float x) { return float(x >= edge); }
    254 
    255 SI Float step(Float edge, Float x) { return if_then(x >= edge, Float(1)); }
    256 
    257 /*
    258 enum RGBA {
    259        R,
    260        G,
    261        B,
    262        A
    263 };*/
    264 
    265 enum XYZW {
    266  X = 0,
    267  Y = 1,
    268  Z = 2,
    269  W = 3,
    270  R = 0,
    271  G = 1,
    272  B = 2,
    273  A = 3,
    274 };
    275 
    276 struct bvec4_scalar;
    277 
    278 struct bvec2_scalar {
    279  bool x;
    280  bool y;
    281 
    282  bvec2_scalar() : bvec2_scalar(false) {}
    283  IMPLICIT constexpr bvec2_scalar(bool a) : x(a), y(a) {}
    284  constexpr bvec2_scalar(bool x, bool y) : x(x), y(y) {}
    285 
    286  bool& select(XYZW c) {
    287    switch (c) {
    288      case X:
    289        return x;
    290      case Y:
    291        return y;
    292      default:
    293        UNREACHABLE;
    294    }
    295  }
    296  bool sel(XYZW c1) { return select(c1); }
    297 
    298  bvec2_scalar sel(XYZW c1, XYZW c2) {
    299    return bvec2_scalar(select(c1), select(c2));
    300  }
    301  bvec4_scalar sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4);
    302 };
    303 
    304 struct bvec2_scalar1 {
    305  bool x;
    306 
    307  IMPLICIT constexpr bvec2_scalar1(bool a) : x(a) {}
    308 
    309  operator bvec2_scalar() const { return bvec2_scalar(x); }
    310 };
    311 
    312 struct bvec2 {
    313  bvec2() : bvec2(0) {}
    314  IMPLICIT bvec2(Bool a) : x(a), y(a) {}
    315  bvec2(Bool x, Bool y) : x(x), y(y) {}
    316  Bool& select(XYZW c) {
    317    switch (c) {
    318      case X:
    319        return x;
    320      case Y:
    321        return y;
    322      default:
    323        UNREACHABLE;
    324    }
    325  }
    326  Bool sel(XYZW c1) { return select(c1); }
    327 
    328  bvec2 sel(XYZW c1, XYZW c2) { return bvec2(select(c1), select(c2)); }
    329 
    330  bvec2 operator~() { return bvec2(~x, ~y); }
    331 
    332  Bool x;
    333  Bool y;
    334 };
    335 
    336 bvec2_scalar1 make_bvec2(bool n) { return bvec2_scalar1(n); }
    337 
    338 bvec2_scalar make_bvec2(bool x, bool y) { return bvec2_scalar{x, y}; }
    339 
    340 template <typename N>
    341 bvec2 make_bvec2(const N& n) {
    342  return bvec2(n);
    343 }
    344 
    345 template <typename X, typename Y>
    346 bvec2 make_bvec2(const X& x, const Y& y) {
    347  return bvec2(x, y);
    348 }
    349 
    350 struct vec3_scalar;
    351 struct vec4_scalar;
    352 
    353 struct vec2_scalar {
    354  typedef struct vec2 vector_type;
    355  typedef float element_type;
    356 
    357  float x;
    358  float y;
    359 
    360  constexpr vec2_scalar() : vec2_scalar(0.0f) {}
    361  IMPLICIT constexpr vec2_scalar(float a) : x(a), y(a) {}
    362  IMPLICIT constexpr vec2_scalar(int a) : x(a), y(a) {}
    363  constexpr vec2_scalar(float x, float y) : x(x), y(y) {}
    364 
    365  float& select(XYZW c) {
    366    switch (c) {
    367      case X:
    368        return x;
    369      case Y:
    370        return y;
    371      default:
    372        UNREACHABLE;
    373    }
    374  }
    375  float& sel(XYZW c1) { return select(c1); }
    376  vec2_scalar sel(XYZW c1, XYZW c2) {
    377    return vec2_scalar(select(c1), select(c2));
    378  }
    379  vec3_scalar sel(XYZW c1, XYZW c2, XYZW c3);
    380  vec4_scalar sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4);
    381 
    382  friend bool operator==(const vec2_scalar& l, const vec2_scalar& r) {
    383    return l.x == r.x && l.y == r.y;
    384  }
    385 
    386  friend bool operator!=(const vec2_scalar& l, const vec2_scalar& r) {
    387    return l.x != r.x || l.y != r.y;
    388  }
    389 
    390  friend vec2_scalar operator*(float a, vec2_scalar b) {
    391    return vec2_scalar(a * b.x, a * b.y);
    392  }
    393  friend vec2_scalar operator*(vec2_scalar a, float b) {
    394    return vec2_scalar(a.x * b, a.y * b);
    395  }
    396  friend vec2_scalar operator*(vec2_scalar a, vec2_scalar b) {
    397    return vec2_scalar(a.x * b.x, a.y * b.y);
    398  }
    399  friend vec2_scalar operator/(vec2_scalar a, float b) {
    400    return vec2_scalar(a.x / b, a.y / b);
    401  }
    402  friend vec2_scalar operator/(vec2_scalar a, vec2_scalar b) {
    403    return vec2_scalar(a.x / b.x, a.y / b.y);
    404  }
    405 
    406  friend vec2_scalar operator-(vec2_scalar a, vec2_scalar b) {
    407    return vec2_scalar(a.x - b.x, a.y - b.y);
    408  }
    409  friend vec2_scalar operator-(vec2_scalar a, float b) {
    410    return vec2_scalar(a.x - b, a.y - b);
    411  }
    412  friend vec2_scalar operator-(float a, vec2_scalar b) {
    413    return vec2_scalar(a - b.x, a - b.y);
    414  }
    415  friend vec2_scalar operator+(vec2_scalar a, vec2_scalar b) {
    416    return vec2_scalar(a.x + b.x, a.y + b.y);
    417  }
    418  friend vec2_scalar operator+(vec2_scalar a, float b) {
    419    return vec2_scalar(a.x + b, a.y + b);
    420  }
    421 
    422  vec2_scalar operator-() { return vec2_scalar(-x, -y); }
    423 
    424  vec2_scalar operator*=(vec2_scalar a) {
    425    x *= a.x;
    426    y *= a.y;
    427    return *this;
    428  }
    429 
    430  vec2_scalar operator/=(vec2_scalar a) {
    431    x /= a.x;
    432    y /= a.y;
    433    return *this;
    434  }
    435 
    436  vec2_scalar operator+=(vec2_scalar a) {
    437    x += a.x;
    438    y += a.y;
    439    return *this;
    440  }
    441 
    442  vec2_scalar operator-=(vec2_scalar a) {
    443    x -= a.x;
    444    y -= a.y;
    445    return *this;
    446  }
    447 };
    448 
    449 struct vec2_scalar_ref {
    450  vec2_scalar_ref(float& x, float& y) : x(x), y(y) {}
    451  float& x;
    452  float& y;
    453 
    454  float& select(XYZW c) {
    455    switch (c) {
    456      case X:
    457        return x;
    458      case Y:
    459        return y;
    460      default:
    461        UNREACHABLE;
    462    }
    463  }
    464  float& sel(XYZW c1) { return select(c1); }
    465 
    466  vec2_scalar_ref& operator=(const vec2_scalar& a) {
    467    x = a.x;
    468    y = a.y;
    469    return *this;
    470  }
    471  vec2_scalar_ref& operator*=(vec2_scalar a) {
    472    x *= a.x;
    473    y *= a.y;
    474    return *this;
    475  }
    476  operator vec2_scalar() const { return vec2_scalar{x, y}; }
    477 };
    478 
    479 struct vec2 {
    480  typedef struct vec2 vector_type;
    481  typedef float element_type;
    482 
    483  constexpr vec2() : vec2(Float(0.0f)) {}
    484  IMPLICIT constexpr vec2(Float a) : x(a), y(a) {}
    485  vec2(Float x, Float y) : x(x), y(y) {}
    486  IMPLICIT constexpr vec2(vec2_scalar s) : x(s.x), y(s.y) {}
    487  constexpr vec2(vec2_scalar s0, vec2_scalar s1, vec2_scalar s2, vec2_scalar s3)
    488      : x(Float{s0.x, s1.x, s2.x, s3.x}), y(Float{s0.y, s1.y, s2.y, s3.y}) {}
    489  explicit vec2(ivec2 a);
    490  Float x;
    491  Float y;
    492 
    493  Float& select(XYZW c) {
    494    switch (c) {
    495      case X:
    496        return x;
    497      case Y:
    498        return y;
    499      default:
    500        UNREACHABLE;
    501    }
    502  }
    503  Float& sel(XYZW c1) { return select(c1); }
    504  vec2 sel(XYZW c1, XYZW c2) { return vec2(select(c1), select(c2)); }
    505 
    506  vec4 sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4);
    507 
    508  vec2 operator*=(Float a) {
    509    x *= a;
    510    y *= a;
    511    return *this;
    512  }
    513  vec2 operator*=(vec2 a) {
    514    x *= a.x;
    515    y *= a.y;
    516    return *this;
    517  }
    518 
    519  vec2 operator/=(Float a) {
    520    x /= a;
    521    y /= a;
    522    return *this;
    523  }
    524  vec2 operator/=(vec2 a) {
    525    x /= a.x;
    526    y /= a.y;
    527    return *this;
    528  }
    529 
    530  vec2 operator+=(vec2 a) {
    531    x += a.x;
    532    y += a.y;
    533    return *this;
    534  }
    535  vec2 operator-=(vec2 a) {
    536    x -= a.x;
    537    y -= a.y;
    538    return *this;
    539  }
    540  vec2 operator-=(Float a) {
    541    x -= a;
    542    y -= a;
    543    return *this;
    544  }
    545 
    546  vec2 operator-() { return vec2(-x, -y); }
    547 
    548  friend I32 operator==(const vec2& l, const vec2& r) {
    549    return l.x == r.x && l.y == r.y;
    550  }
    551 
    552  friend I32 operator!=(const vec2& l, const vec2& r) {
    553    return l.x != r.x || l.y != r.y;
    554  }
    555 
    556  friend vec2 operator*(vec2 a, Float b) { return vec2(a.x * b, a.y * b); }
    557  friend vec2 operator*(vec2 a, vec2 b) { return vec2(a.x * b.x, a.y * b.y); }
    558  friend vec2 operator*(Float a, vec2 b) { return vec2(a * b.x, a * b.y); }
    559 
    560  friend vec2 operator/(vec2 a, vec2 b) { return vec2(a.x / b.x, a.y / b.y); }
    561  friend vec2 operator/(vec2 a, Float b) { return vec2(a.x / b, a.y / b); }
    562 
    563  friend vec2 operator-(vec2 a, vec2 b) { return vec2(a.x - b.x, a.y - b.y); }
    564  friend vec2 operator-(vec2 a, Float b) { return vec2(a.x - b, a.y - b); }
    565  friend vec2 operator-(Float a, vec2 b) { return vec2(a - b.x, a - b.y); }
    566  friend vec2 operator+(vec2 a, vec2 b) { return vec2(a.x + b.x, a.y + b.y); }
    567  friend vec2 operator+(vec2 a, Float b) { return vec2(a.x + b, a.y + b); }
    568  friend vec2 operator+(Float a, vec2 b) { return vec2(a + b.x, a + b.y); }
    569 };
    570 
    571 vec2_scalar force_scalar(const vec2& v) {
    572  return vec2_scalar{force_scalar(v.x), force_scalar(v.y)};
    573 }
    574 
    575 vec2_scalar make_vec2(float n) { return vec2_scalar{n, n}; }
    576 
    577 vec2_scalar make_vec2(float x, float y) { return vec2_scalar{x, y}; }
    578 
    579 vec2_scalar make_vec2(int32_t x, int32_t y) {
    580  return vec2_scalar{float(x), float(y)};
    581 }
    582 
    583 template <typename N>
    584 vec2 make_vec2(const N& n) {
    585  return vec2(n);
    586 }
    587 
    588 template <typename X, typename Y>
    589 vec2 make_vec2(const X& x, const Y& y) {
    590  return vec2(x, y);
    591 }
    592 
    593 vec2 operator*(vec2_scalar a, Float b) { return vec2(a.x * b, a.y * b); }
    594 
    595 vec2 operator*(Float a, vec2_scalar b) { return vec2(a * b.x, a * b.y); }
    596 
    597 SI vec2 min(vec2 a, vec2 b) { return vec2(min(a.x, b.x), min(a.y, b.y)); }
    598 SI vec2 min(vec2 a, Float b) { return vec2(min(a.x, b), min(a.y, b)); }
    599 
    600 SI vec2_scalar min(vec2_scalar a, vec2_scalar b) {
    601  return vec2_scalar{min(a.x, b.x), min(a.y, b.y)};
    602 }
    603 
    604 SI vec2 if_then_else(I32 c, vec2 t, vec2 e) {
    605  return vec2(if_then_else(c, t.x, e.x), if_then_else(c, t.y, e.y));
    606 }
    607 
    608 SI vec2 if_then_else(int32_t c, vec2 t, vec2 e) { return c ? t : e; }
    609 
    610 vec2 step(vec2 edge, vec2 x) {
    611  return vec2(step(edge.x, x.x), step(edge.y, x.y));
    612 }
    613 
    614 vec2_scalar step(vec2_scalar edge, vec2_scalar x) {
    615  return vec2_scalar(step(edge.x, x.x), step(edge.y, x.y));
    616 }
    617 
    618 SI vec2 max(vec2 a, vec2 b) { return vec2(max(a.x, b.x), max(a.y, b.y)); }
    619 SI vec2 max(vec2 a, Float b) { return vec2(max(a.x, b), max(a.y, b)); }
    620 
    621 SI vec2_scalar max(vec2_scalar a, vec2_scalar b) {
    622  return vec2_scalar{max(a.x, b.x), max(a.y, b.y)};
    623 }
    624 SI vec2_scalar max(vec2_scalar a, float b) {
    625  return vec2_scalar{max(a.x, b), max(a.y, b)};
    626 }
    627 
    628 Float length(vec2 a) { return sqrt(a.x * a.x + a.y * a.y); }
    629 
    630 float length(vec2_scalar a) { return hypotf(a.x, a.y); }
    631 
    632 template <typename A, typename B>
    633 SI auto distance(A a, B b) {
    634  return length(a - b);
    635 }
    636 
    637 template <typename T>
    638 SI T normalize(T a) {
    639  return a / length(a);
    640 }
    641 
    642 SI vec2 sqrt(vec2 a) { return vec2(sqrt(a.x), sqrt(a.y)); }
    643 
    644 SI vec2_scalar sqrt(vec2_scalar a) { return vec2_scalar(sqrt(a.x), sqrt(a.y)); }
    645 
    646 SI vec2 recip(vec2 a) { return vec2(recip(a.x), recip(a.y)); }
    647 
    648 SI vec2_scalar recip(vec2_scalar a) {
    649  return vec2_scalar(recip(a.x), recip(a.y));
    650 }
    651 
    652 SI vec2 inversesqrt(vec2 a) { return vec2(inversesqrt(a.x), inversesqrt(a.y)); }
    653 
    654 SI vec2_scalar inversesqrt(vec2_scalar a) {
    655  return vec2_scalar(inversesqrt(a.x), inversesqrt(a.y));
    656 }
    657 
    658 #define abs __glsl_abs
    659 
    660 int32_t abs(int32_t a) { return a < 0 ? -a : a; }
    661 
    662 float abs(float a) { return fabsf(a); }
    663 
    664 Float abs(Float v) {
    665 #if USE_NEON
    666  return vabsq_f32(v);
    667 #else
    668  return bit_cast<Float>(bit_cast<I32>(v) & bit_cast<I32>(0.0f - v));
    669 #endif
    670 }
    671 
    672 float sign(float a) { return copysignf(1.0f, a); }
    673 
    674 Float sign(Float v) {
    675  return bit_cast<Float>((bit_cast<I32>(v) & 0x80000000) |
    676                         bit_cast<I32>(Float(1.0f)));
    677 }
    678 
    679 Float cast(U32 v) { return CONVERT((I32)v, Float); }
    680 Float cast(I32 v) { return CONVERT((I32)v, Float); }
    681 I32 cast(Float v) { return CONVERT(v, I32); }
    682 
    683 #define floor __glsl_floor
    684 
    685 float floor(float a) { return floorf(a); }
    686 
    687 Float floor(Float v) {
    688  Float roundtrip = cast(cast(v));
    689  return roundtrip - if_then(roundtrip > v, Float(1));
    690 }
    691 
    692 vec2 floor(vec2 v) { return vec2(floor(v.x), floor(v.y)); }
    693 
    694 vec2_scalar floor(vec2_scalar v) {
    695  return vec2_scalar{floorf(v.x), floorf(v.y)};
    696 }
    697 
    698 #define ceil __glsl_ceil
    699 
    700 float ceil(float a) { return ceilf(a); }
    701 
    702 Float ceil(Float v) {
    703  Float roundtrip = cast(cast(v));
    704  return roundtrip + if_then(roundtrip < v, Float(1));
    705 }
    706 
    707 // Round to nearest even
    708 SI int32_t roundeven(float v, float scale) {
    709 #if USE_SSE2
    710  return _mm_cvtss_si32(_mm_set_ss(v * scale));
    711 #else
    712  return bit_cast<int32_t>(v * scale + float(0xC00000)) - 0x4B400000;
    713 #endif
    714 }
    715 
    716 SI I32 roundeven(Float v, Float scale) {
    717 #if USE_SSE2
    718  return _mm_cvtps_epi32(v * scale);
    719 #else
    720  // Magic number implementation of round-to-nearest-even
    721  // see http://stereopsis.com/sree/fpu2006.html
    722  return bit_cast<I32>(v * scale + Float(0xC00000)) - 0x4B400000;
    723 #endif
    724 }
    725 
    726 // Round towards zero
    727 SI int32_t roundzero(float v, float scale) { return int32_t(v * scale); }
    728 
    729 SI I32 roundzero(Float v, Float scale) { return cast(v * scale); }
    730 
    731 // Round whichever direction is fastest for positive numbers
    732 SI I32 roundfast(Float v, Float scale) {
    733 #if USE_SSE2
    734  return _mm_cvtps_epi32(v * scale);
    735 #else
    736  return cast(v * scale + 0.5f);
    737 #endif
    738 }
    739 
    740 template <typename T>
    741 SI auto round_pixel(T v, float scale = 255.0f) {
    742  return roundfast(v, scale);
    743 }
    744 
    745 #define round __glsl_round
    746 
    747 float round(float a) { return roundf(a); }
    748 
    749 Float round(Float v) { return floor(v + 0.5f); }
    750 
    751 float fract(float a) { return a - floor(a); }
    752 
    753 Float fract(Float v) { return v - floor(v); }
    754 
    755 vec2 fract(vec2 v) { return vec2(fract(v.x), fract(v.y)); }
    756 
    757 vec2_scalar fract(vec2_scalar v) { return vec2_scalar(fract(v.x), fract(v.y)); }
    758 
    759 // X derivatives can be approximated by dFdx(x) = x[1] - x[0].
    760 // Y derivatives are not easily available since we operate in terms of X spans
    761 // only. To work around, assume dFdy(p.x) = dFdx(p.y), which only holds for
    762 // uniform scaling, and thus abs(dFdx(p.x)) + abs(dFdy(p.x)) = abs(dFdx(p.x)) +
    763 // abs(dFdx(p.y)) which mirrors abs(dFdx(p.y)) + abs(dFdy(p.y)) = abs(dFdx(p.y))
    764 // + abs(dFdx(p.x)).
    765 vec2_scalar fwidth(vec2 p) {
    766  Float d = abs(SHUFFLE(p.x, p.y, 1, 1, 5, 5) - SHUFFLE(p.x, p.y, 0, 0, 4, 4));
    767  return vec2_scalar(d.x + d.z);
    768 }
    769 
    770 float dFdx(Float x) { return x.y - x.x; }
    771 
    772 vec2_scalar dFdx(vec2 p) { return vec2_scalar(dFdx(p.x), dFdx(p.y)); }
    773 
    774 // See
    775 // http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html.
    776 Float approx_log2(Float x) {
    777  // e - 127 is a fair approximation of log2(x) in its own right...
    778  Float e = cast(bit_cast<U32>(x)) * (1.0f / (1 << 23));
    779 
    780  // ... but using the mantissa to refine its error is _much_ better.
    781  Float m = bit_cast<Float>((bit_cast<U32>(x) & 0x007fffff) | 0x3f000000);
    782  return e - 124.225514990f - 1.498030302f * m -
    783         1.725879990f / (0.3520887068f + m);
    784 }
    785 
    786 Float approx_pow2(Float x) {
    787  Float f = fract(x);
    788  return bit_cast<Float>(
    789      roundfast(1.0f * (1 << 23), x + 121.274057500f - 1.490129070f * f +
    790                                      27.728023300f / (4.84252568f - f)));
    791 }
    792 
    793 #define pow __glsl_pow
    794 
    795 SI float pow(float x, float y) { return powf(x, y); }
    796 
    797 Float pow(Float x, Float y) {
    798  return if_then_else((x == 0) | (x == 1), x, approx_pow2(approx_log2(x) * y));
    799 }
    800 
    801 #define exp __glsl_exp
    802 
    803 SI float exp(float x) { return expf(x); }
    804 
    805 Float exp(Float y) {
    806  float l2e = 1.4426950408889634074f;
    807  return approx_pow2(l2e * y);
    808 }
    809 
    810 #define exp2 __glsl_exp2
    811 
    812 SI float exp2(float x) { return exp2f(x); }
    813 
    814 Float exp2(Float x) { return approx_pow2(x); }
    815 
    816 #define log __glsl_log
    817 
    818 SI float log(float x) { return logf(x); }
    819 
    820 Float log(Float x) { return approx_log2(x) * 0.69314718f; }
    821 
    822 #define log2 __glsl_log2
    823 
    824 SI float log2(float x) { return log2f(x); }
    825 
    826 Float log2(Float x) { return approx_log2(x); }
    827 
    828 struct ivec4;
    829 
    830 struct ivec2_scalar {
    831  typedef int32_t element_type;
    832 
    833  int32_t x;
    834  int32_t y;
    835 
    836  ivec2_scalar() : ivec2_scalar(0) {}
    837  IMPLICIT constexpr ivec2_scalar(int32_t a) : x(a), y(a) {}
    838  constexpr ivec2_scalar(int32_t x, int32_t y) : x(x), y(y) {}
    839 
    840  int32_t& select(XYZW c) {
    841    switch (c) {
    842      case X:
    843        return x;
    844      case Y:
    845        return y;
    846      default:
    847        UNREACHABLE;
    848    }
    849  }
    850  int32_t& sel(XYZW c1) { return select(c1); }
    851  ivec2_scalar sel(XYZW c1, XYZW c2) {
    852    return ivec2_scalar{select(c1), select(c2)};
    853  }
    854 
    855  ivec2_scalar operator-() const { return ivec2_scalar{-x, -y}; }
    856 
    857  ivec2_scalar& operator+=(ivec2_scalar a) {
    858    x += a.x;
    859    y += a.y;
    860    return *this;
    861  }
    862  ivec2_scalar& operator+=(int n) {
    863    x += n;
    864    y += n;
    865    return *this;
    866  }
    867 
    868  ivec2_scalar& operator>>=(int shift) {
    869    x >>= shift;
    870    y >>= shift;
    871    return *this;
    872  }
    873 
    874  friend ivec2_scalar operator&(ivec2_scalar a, int b) {
    875    return ivec2_scalar{a.x & b, a.y & b};
    876  }
    877 
    878  friend ivec2_scalar operator+(ivec2_scalar a, ivec2_scalar b) {
    879    return ivec2_scalar{a.x + b.x, a.y + b.y};
    880  }
    881  friend ivec2_scalar operator+(ivec2_scalar a, int b) {
    882    return ivec2_scalar{a.x + b, a.y + b};
    883  }
    884 
    885  friend ivec2_scalar operator-(ivec2_scalar a, ivec2_scalar b) {
    886    return ivec2_scalar{a.x - b.x, a.y - b.y};
    887  }
    888  friend ivec2_scalar operator-(ivec2_scalar a, int b) {
    889    return ivec2_scalar{a.x - b, a.y - b};
    890  }
    891 
    892  friend bool operator==(const ivec2_scalar& l, const ivec2_scalar& r) {
    893    return l.x == r.x && l.y == r.y;
    894  }
    895 };
    896 
    897 struct ivec2 {
    898  typedef int32_t element_type;
    899 
    900  ivec2() : ivec2(I32(0)) {}
    901  IMPLICIT ivec2(I32 a) : x(a), y(a) {}
    902  ivec2(I32 x, I32 y) : x(x), y(y) {}
    903  IMPLICIT ivec2(vec2 a) : x(cast(a.x)), y(cast(a.y)) {}
    904  ivec2(U32 x, U32 y) : x(CONVERT(x, I32)), y(CONVERT(y, I32)) {}
    905  IMPLICIT constexpr ivec2(ivec2_scalar s) : x(s.x), y(s.y) {}
    906  constexpr ivec2(ivec2_scalar s0, ivec2_scalar s1, ivec2_scalar s2,
    907                  ivec2_scalar s3)
    908      : x(I32{s0.x, s1.x, s2.x, s3.x}), y(I32{s0.y, s1.y, s2.y, s3.y}) {}
    909  I32 x;
    910  I32 y;
    911 
    912  I32& select(XYZW c) {
    913    switch (c) {
    914      case X:
    915        return x;
    916      case Y:
    917        return y;
    918      default:
    919        UNREACHABLE;
    920    }
    921  }
    922  I32& sel(XYZW c1) { return select(c1); }
    923 
    924  ivec2 sel(XYZW c1, XYZW c2) { return ivec2(select(c1), select(c2)); }
    925 
    926  ivec4 sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4);
    927 
    928  ivec2& operator*=(I32 a) {
    929    x *= a;
    930    y *= a;
    931    return *this;
    932  }
    933  ivec2& operator+=(ivec2 a) {
    934    x += a.x;
    935    y += a.y;
    936    return *this;
    937  }
    938  ivec2& operator>>=(int shift) {
    939    x >>= shift;
    940    y >>= shift;
    941    return *this;
    942  }
    943 
    944  friend ivec2 operator*(ivec2 a, I32 b) { return ivec2(a.x * b, a.y * b); }
    945  friend ivec2 operator&(ivec2 a, ivec2 b) {
    946    return ivec2(a.x & b.x, a.y & b.y);
    947  }
    948  friend ivec2 operator&(ivec2 a, I32 b) { return ivec2(a.x & b, a.y & b); }
    949  friend ivec2 operator+(ivec2 a, ivec2 b) {
    950    return ivec2(a.x + b.x, a.y + b.y);
    951  }
    952 };
    953 
    954 vec2::vec2(ivec2 a) : x(cast(a.x)), y(cast(a.y)) {}
    955 
    956 ivec2_scalar make_ivec2(int32_t n) { return ivec2_scalar{n, n}; }
    957 
    958 ivec2_scalar make_ivec2(uint32_t n) {
    959  return ivec2_scalar{int32_t(n), int32_t(n)};
    960 }
    961 
    962 ivec2_scalar make_ivec2(int32_t x, int32_t y) { return ivec2_scalar{x, y}; }
    963 
    964 ivec2_scalar make_ivec2(uint32_t x, uint32_t y) {
    965  return ivec2_scalar{int32_t(x), int32_t(y)};
    966 }
    967 
    968 vec2_scalar make_vec2(const ivec2_scalar& v) {
    969  return vec2_scalar{float(v.x), float(v.y)};
    970 }
    971 
    972 ivec2_scalar make_ivec2(const vec2_scalar& v) {
    973  return ivec2_scalar{int32_t(v.x), int32_t(v.y)};
    974 }
    975 
    976 template <typename N>
    977 ivec2 make_ivec2(const N& n) {
    978  return ivec2(n);
    979 }
    980 
    981 template <typename X, typename Y>
    982 ivec2 make_ivec2(const X& x, const Y& y) {
    983  return ivec2(x, y);
    984 }
    985 
    986 ivec2_scalar force_scalar(const ivec2& v) {
    987  return ivec2_scalar{force_scalar(v.x), force_scalar(v.y)};
    988 }
    989 
    990 struct ivec3_scalar {
    991  int32_t x;
    992  int32_t y;
    993  int32_t z;
    994 
    995  ivec3_scalar() : ivec3_scalar(0) {}
    996  IMPLICIT constexpr ivec3_scalar(int32_t a) : x(a), y(a), z(a) {}
    997  constexpr ivec3_scalar(int32_t x, int32_t y, int32_t z) : x(x), y(y), z(z) {}
    998 
    999  int32_t& select(XYZW c) {
   1000    switch (c) {
   1001      case X:
   1002        return x;
   1003      case Y:
   1004        return y;
   1005      case Z:
   1006        return z;
   1007      default:
   1008        UNREACHABLE;
   1009    }
   1010  }
   1011  int32_t& sel(XYZW c1) { return select(c1); }
   1012  ivec2_scalar sel(XYZW c1, XYZW c2) {
   1013    return ivec2_scalar{select(c1), select(c2)};
   1014  }
   1015 };
   1016 
   1017 struct ivec3 {
   1018  ivec3() : ivec3(0) {}
   1019  IMPLICIT ivec3(I32 a) : x(a), y(a), z(a) {}
   1020  ivec3(I32 x, I32 y, I32 z) : x(x), y(y), z(z) {}
   1021  ivec3(ivec2 a, I32 b) : x(a.x), y(a.y), z(b) {}
   1022  ivec3(vec2 a, Float b) : x(cast(a.x)), y(cast(a.y)), z(cast(b)) {}
   1023  I32 x;
   1024  I32 y;
   1025  I32 z;
   1026 
   1027  friend ivec3 operator+(ivec3 a, ivec3 b) {
   1028    return ivec3(a.x + b.x, a.y + b.y, a.z + b.z);
   1029  }
   1030 };
   1031 
   1032 vec2_scalar make_vec2(ivec3_scalar s) {
   1033  return vec2_scalar{float(s.x), float(s.y)};
   1034 }
   1035 
   1036 ivec3_scalar make_ivec3(int32_t n) { return ivec3_scalar{n, n, n}; }
   1037 
   1038 ivec3_scalar make_ivec3(const ivec2_scalar& v, int32_t z) {
   1039  return ivec3_scalar{v.x, v.y, z};
   1040 }
   1041 
   1042 ivec3_scalar make_ivec3(int32_t x, int32_t y, int32_t z) {
   1043  return ivec3_scalar{x, y, z};
   1044 }
   1045 
   1046 template <typename N>
   1047 ivec3 make_ivec3(const N& n) {
   1048  return ivec3(n);
   1049 }
   1050 
   1051 template <typename X, typename Y>
   1052 ivec3 make_ivec3(const X& x, const Y& y) {
   1053  return ivec3(x, y);
   1054 }
   1055 
   1056 template <typename X, typename Y, typename Z>
   1057 ivec3 make_ivec3(const X& x, const Y& y, const Z& z) {
   1058  return ivec3(x, y, z);
   1059 }
   1060 
   1061 struct ivec4_scalar {
   1062  typedef int32_t element_type;
   1063 
   1064  int32_t x;
   1065  int32_t y;
   1066  int32_t z;
   1067  int32_t w;
   1068 
   1069  ivec4_scalar() : ivec4_scalar(0) {}
   1070  IMPLICIT constexpr ivec4_scalar(int32_t a) : x(a), y(a), z(a), w(a) {}
   1071  constexpr ivec4_scalar(int32_t x, int32_t y, int32_t z, int32_t w)
   1072      : x(x), y(y), z(z), w(w) {}
   1073 
   1074  int32_t& select(XYZW c) {
   1075    switch (c) {
   1076      case X:
   1077        return x;
   1078      case Y:
   1079        return y;
   1080      case Z:
   1081        return z;
   1082      case W:
   1083        return w;
   1084      default:
   1085        UNREACHABLE;
   1086    }
   1087  }
   1088  int32_t& sel(XYZW c1) { return select(c1); }
   1089  ivec2_scalar sel(XYZW c1, XYZW c2) {
   1090    return ivec2_scalar{select(c1), select(c2)};
   1091  }
   1092 
   1093  friend ivec4_scalar operator&(int32_t a, ivec4_scalar b) {
   1094    return ivec4_scalar{a & b.x, a & b.y, a & b.z, a & b.w};
   1095  }
   1096  friend ivec4_scalar operator<<(ivec4_scalar a, int32_t b) {
   1097    return ivec4_scalar{a.x << b, a.y << b, a.z << b, a.w << b};
   1098  }
   1099 
   1100  int32_t& operator[](int index) {
   1101    switch (index) {
   1102      case 0:
   1103        return x;
   1104      case 1:
   1105        return y;
   1106      case 2:
   1107        return z;
   1108      case 3:
   1109        return w;
   1110      default:
   1111        UNREACHABLE;
   1112    }
   1113  }
   1114 };
   1115 
   1116 struct ivec4 {
   1117  typedef int32_t element_type;
   1118 
   1119  ivec4() : ivec4(I32(0)) {}
   1120  IMPLICIT ivec4(I32 a) : x(a), y(a), z(a), w(a) {}
   1121  ivec4(I32 x, I32 y, I32 z, I32 w) : x(x), y(y), z(z), w(w) {}
   1122  ivec4(ivec2 a, I32 b, I32 c) : x(a.x), y(a.y), z(b), w(c) {}
   1123  IMPLICIT constexpr ivec4(ivec4_scalar s) : x(s.x), y(s.y), z(s.z), w(s.w) {}
   1124  constexpr ivec4(ivec4_scalar s0, ivec4_scalar s1, ivec4_scalar s2,
   1125                  ivec4_scalar s3)
   1126      : x(I32{s0.x, s1.x, s2.x, s3.x}),
   1127        y(I32{s0.y, s1.y, s2.y, s3.y}),
   1128        z(I32{s0.z, s1.z, s2.z, s3.z}),
   1129        w(I32{s0.w, s1.w, s2.w, s3.w}) {}
   1130 
   1131  I32& select(XYZW c) {
   1132    switch (c) {
   1133      case X:
   1134        return x;
   1135      case Y:
   1136        return y;
   1137      case Z:
   1138        return z;
   1139      case W:
   1140        return w;
   1141      default:
   1142        UNREACHABLE;
   1143    }
   1144  }
   1145  I32 sel(XYZW c1) { return select(c1); }
   1146 
   1147  ivec2 sel(XYZW c1, XYZW c2) { return ivec2(select(c1), select(c2)); }
   1148 
   1149  ivec3 sel(XYZW c1, XYZW c2, XYZW c3) {
   1150    return ivec3(select(c1), select(c2), select(c3));
   1151  }
   1152 
   1153  friend ivec4 operator&(I32 a, ivec4 b) {
   1154    return ivec4(a & b.x, a & b.y, a & b.z, a & b.w);
   1155  }
   1156 
   1157  I32 x;
   1158  I32 y;
   1159  I32 z;
   1160  I32 w;
   1161 };
   1162 
   1163 ivec4_scalar force_scalar(const ivec4& v) {
   1164  return ivec4_scalar{force_scalar(v.x), force_scalar(v.y), force_scalar(v.z),
   1165                      force_scalar(v.w)};
   1166 }
   1167 
   1168 ivec4_scalar make_ivec4(int32_t n) { return ivec4_scalar{n, n, n, n}; }
   1169 
   1170 ivec4_scalar make_ivec4(const ivec2_scalar& xy, int32_t z, int32_t w) {
   1171  return ivec4_scalar{xy.x, xy.y, z, w};
   1172 }
   1173 
   1174 ivec4_scalar make_ivec4(int32_t x, int32_t y, int32_t z, int32_t w) {
   1175  return ivec4_scalar{x, y, z, w};
   1176 }
   1177 
   1178 template <typename N>
   1179 ivec4 make_ivec4(const N& n) {
   1180  return ivec4(n);
   1181 }
   1182 
   1183 template <typename X, typename Y, typename Z>
   1184 ivec4 make_ivec4(const X& x, const Y& y, const Z& z) {
   1185  return ivec4(x, y, z);
   1186 }
   1187 
   1188 template <typename X, typename Y, typename Z, typename W>
   1189 ivec4 make_ivec4(const X& x, const Y& y, const Z& z, const W& w) {
   1190  return ivec4(x, y, z, w);
   1191 }
   1192 
   1193 SI ivec2 if_then_else(I32 c, ivec2 t, ivec2 e) {
   1194  return ivec2(if_then_else(c, t.x, e.x), if_then_else(c, t.y, e.y));
   1195 }
   1196 
   1197 SI ivec2 if_then_else(int32_t c, ivec2 t, ivec2 e) { return c ? t : e; }
   1198 
   1199 SI ivec4 if_then_else(I32 c, ivec4 t, ivec4 e) {
   1200  return ivec4(if_then_else(c, t.x, e.x), if_then_else(c, t.y, e.y),
   1201               if_then_else(c, t.z, e.z), if_then_else(c, t.w, e.w));
   1202 }
   1203 
   1204 SI ivec4 if_then_else(int32_t c, ivec4 t, ivec4 e) { return c ? t : e; }
   1205 
   1206 ivec4 operator&(I32 a, ivec4_scalar b) {
   1207  return ivec4(a & b.x, a & b.y, a & b.z, a & b.w);
   1208 }
   1209 
   1210 struct bvec3_scalar {
   1211  bool x;
   1212  bool y;
   1213  bool z;
   1214 
   1215  bvec3_scalar() : bvec3_scalar(false) {}
   1216  IMPLICIT constexpr bvec3_scalar(bool a) : x(a), y(a), z(a) {}
   1217  constexpr bvec3_scalar(bool x, bool y, bool z) : x(x), y(y), z(z) {}
   1218 };
   1219 
   1220 struct bvec3_scalar1 {
   1221  bool x;
   1222 
   1223  IMPLICIT constexpr bvec3_scalar1(bool a) : x(a) {}
   1224 
   1225  operator bvec3_scalar() const { return bvec3_scalar(x); }
   1226 };
   1227 
   1228 struct bvec3 {
   1229  bvec3() : bvec3(0) {}
   1230  IMPLICIT bvec3(Bool a) : x(a), y(a), z(a) {}
   1231  bvec3(Bool x, Bool y, Bool z) : x(x), y(y), z(z) {}
   1232  Bool& select(XYZW c) {
   1233    switch (c) {
   1234      case X:
   1235        return x;
   1236      case Y:
   1237        return y;
   1238      case Z:
   1239        return z;
   1240      default:
   1241        UNREACHABLE;
   1242    }
   1243  }
   1244  Bool sel(XYZW c1) { return select(c1); }
   1245 
   1246  Bool x;
   1247  Bool y;
   1248  Bool z;
   1249 };
   1250 
   1251 bvec3_scalar1 make_bvec3(bool n) { return bvec3_scalar1(n); }
   1252 
   1253 struct bvec4_scalar {
   1254  bool x;
   1255  bool y;
   1256  bool z;
   1257  bool w;
   1258 
   1259  bvec4_scalar() : bvec4_scalar(false) {}
   1260  IMPLICIT constexpr bvec4_scalar(bool a) : x(a), y(a), z(a), w(a) {}
   1261  constexpr bvec4_scalar(bool x, bool y, bool z, bool w)
   1262      : x(x), y(y), z(z), w(w) {}
   1263 
   1264  bool& select(XYZW c) {
   1265    switch (c) {
   1266      case X:
   1267        return x;
   1268      case Y:
   1269        return y;
   1270      case Z:
   1271        return z;
   1272      case W:
   1273        return w;
   1274      default:
   1275        UNREACHABLE;
   1276    }
   1277  }
   1278  bool sel(XYZW c1) { return select(c1); }
   1279  bvec2_scalar sel(XYZW c1, XYZW c2) {
   1280    return bvec2_scalar(select(c1), select(c2));
   1281  }
   1282 };
   1283 
   1284 bvec4_scalar bvec2_scalar::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   1285  return bvec4_scalar{select(c1), select(c2), select(c3), select(c4)};
   1286 }
   1287 
   1288 struct bvec4_scalar1 {
   1289  bool x;
   1290 
   1291  IMPLICIT constexpr bvec4_scalar1(bool a) : x(a) {}
   1292 
   1293  operator bvec4_scalar() const { return bvec4_scalar(x); }
   1294 };
   1295 
   1296 struct bvec4 {
   1297  bvec4() : bvec4(0) {}
   1298  IMPLICIT bvec4(Bool a) : x(a), y(a), z(a), w(a) {}
   1299  bvec4(Bool x, Bool y, Bool z, Bool w) : x(x), y(y), z(z), w(w) {}
   1300  bvec4(bvec2 x, bvec2 y) : x(x.x), y(x.y), z(y.x), w(y.y) {}
   1301  Bool& select(XYZW c) {
   1302    switch (c) {
   1303      case X:
   1304        return x;
   1305      case Y:
   1306        return y;
   1307      case Z:
   1308        return z;
   1309      case W:
   1310        return w;
   1311      default:
   1312        UNREACHABLE;
   1313    }
   1314  }
   1315  Bool sel(XYZW c1) { return select(c1); }
   1316 
   1317  Bool x;
   1318  Bool y;
   1319  Bool z;
   1320  Bool w;
   1321 };
   1322 
   1323 bvec4_scalar1 make_bvec4(bool n) { return bvec4_scalar1(n); }
   1324 
   1325 bvec4_scalar make_bvec4(bool x, bool y, bool z, bool w) {
   1326  return bvec4_scalar{x, y, z, w};
   1327 }
   1328 
   1329 bvec4_scalar make_bvec4(bvec2_scalar a, bvec2_scalar b) {
   1330  return bvec4_scalar{a.x, a.y, b.x, b.y};
   1331 }
   1332 
   1333 template <typename N>
   1334 bvec4 make_bvec4(const N& n) {
   1335  return bvec4(n);
   1336 }
   1337 
   1338 template <typename X, typename Y>
   1339 bvec4 make_bvec4(const X& x, const Y& y) {
   1340  return bvec4(x, y);
   1341 }
   1342 
   1343 template <typename X, typename Y, typename Z, typename W>
   1344 bvec4 make_bvec4(const X& x, const Y& y, const Z& z, const W& w) {
   1345  return bvec4(x, y, z, w);
   1346 }
   1347 
   1348 struct vec2_ref {
   1349  vec2_ref(Float& x, Float& y) : x(x), y(y) {}
   1350  Float& x;
   1351  Float& y;
   1352 
   1353  Float& select(XYZW c) {
   1354    switch (c) {
   1355      case X:
   1356        return x;
   1357      case Y:
   1358        return y;
   1359      default:
   1360        UNREACHABLE;
   1361    }
   1362  }
   1363  Float& sel(XYZW c1) { return select(c1); }
   1364 
   1365  vec2_ref& operator=(const vec2& a) {
   1366    x = a.x;
   1367    y = a.y;
   1368    return *this;
   1369  }
   1370 
   1371  vec2_ref& operator/=(Float a) {
   1372    x /= a;
   1373    y /= a;
   1374    return *this;
   1375  }
   1376 
   1377  vec2_ref& operator/=(vec2 a) {
   1378    x /= a.x;
   1379    y /= a.y;
   1380    return *this;
   1381  }
   1382 
   1383  vec2_ref& operator+=(vec2 a) {
   1384    x += a.x;
   1385    y += a.y;
   1386    return *this;
   1387  }
   1388  vec2_ref& operator-=(vec2 a) {
   1389    x -= a.x;
   1390    y -= a.y;
   1391    return *this;
   1392  }
   1393  vec2_ref& operator*=(vec2 a) {
   1394    x *= a.x;
   1395    y *= a.y;
   1396    return *this;
   1397  }
   1398 };
   1399 
   1400 struct vec3_scalar {
   1401  typedef struct vec3 vector_type;
   1402  typedef float element_type;
   1403 
   1404  float x;
   1405  float y;
   1406  float z;
   1407 
   1408  constexpr vec3_scalar() : vec3_scalar(0.0f) {}
   1409  IMPLICIT constexpr vec3_scalar(float a) : x(a), y(a), z(a) {}
   1410  constexpr vec3_scalar(float x, float y, float z) : x(x), y(y), z(z) {}
   1411 
   1412  float& select(XYZW c) {
   1413    switch (c) {
   1414      case X:
   1415        return x;
   1416      case Y:
   1417        return y;
   1418      case Z:
   1419        return z;
   1420      default:
   1421        UNREACHABLE;
   1422    }
   1423  }
   1424  float& sel(XYZW c1) { return select(c1); }
   1425  vec2_scalar sel(XYZW c1, XYZW c2) {
   1426    return vec2_scalar(select(c1), select(c2));
   1427  }
   1428  vec3_scalar sel(XYZW c1, XYZW c2, XYZW c3) {
   1429    return vec3_scalar(select(c1), select(c2), select(c3));
   1430  }
   1431  vec2_scalar_ref lsel(XYZW c1, XYZW c2) {
   1432    return vec2_scalar_ref(select(c1), select(c2));
   1433  }
   1434 
   1435  friend vec3_scalar operator*(vec3_scalar a, vec3_scalar b) {
   1436    return vec3_scalar{a.x * b.x, a.y * b.y, a.z * b.z};
   1437  }
   1438  friend vec3_scalar operator*(vec3_scalar a, float b) {
   1439    return vec3_scalar{a.x * b, a.y * b, a.z * b};
   1440  }
   1441 
   1442  friend vec3_scalar operator-(vec3_scalar a, vec3_scalar b) {
   1443    return vec3_scalar{a.x - b.x, a.y - b.y, a.z - b.z};
   1444  }
   1445  friend vec3_scalar operator-(vec3_scalar a, float b) {
   1446    return vec3_scalar{a.x - b, a.y - b, a.z - b};
   1447  }
   1448  friend vec3_scalar operator+(vec3_scalar a, vec3_scalar b) {
   1449    return vec3_scalar{a.x + b.x, a.y + b.y, a.z + b.z};
   1450  }
   1451  friend vec3_scalar operator+(vec3_scalar a, float b) {
   1452    return vec3_scalar{a.x + b, a.y + b, a.z + b};
   1453  }
   1454 
   1455  friend vec3_scalar operator/(vec3_scalar a, vec3_scalar b) {
   1456    return vec3_scalar{a.x / b.x, a.y / b.y, a.z / b.z};
   1457  }
   1458  friend vec3_scalar operator/(vec3_scalar a, float b) {
   1459    return vec3_scalar{a.x / b, a.y / b, a.z / b};
   1460  }
   1461 
   1462  vec3_scalar operator+=(vec3_scalar a) {
   1463    x += a.x;
   1464    y += a.y;
   1465    z += a.z;
   1466    return *this;
   1467  }
   1468 
   1469  friend bool operator==(const vec3_scalar& l, const vec3_scalar& r) {
   1470    return l.x == r.x && l.y == r.y && l.z == r.z;
   1471  }
   1472 };
   1473 
   1474 struct vec3_scalar_ref {
   1475  vec3_scalar_ref(float& x, float& y, float& z) : x(x), y(y), z(z) {}
   1476  float& x;
   1477  float& y;
   1478  float& z;
   1479 
   1480  float& select(XYZW c) {
   1481    switch (c) {
   1482      case X:
   1483        return x;
   1484      case Y:
   1485        return y;
   1486      case Z:
   1487        return z;
   1488      default:
   1489        UNREACHABLE;
   1490    }
   1491  }
   1492  float& sel(XYZW c1) { return select(c1); }
   1493 
   1494  vec3_scalar_ref& operator=(const vec3_scalar& a) {
   1495    x = a.x;
   1496    y = a.y;
   1497    z = a.z;
   1498    return *this;
   1499  }
   1500 
   1501  operator vec3_scalar() const { return vec3_scalar{x, y, z}; }
   1502 };
   1503 
   1504 struct vec3 {
   1505  typedef struct vec3 vector_type;
   1506  typedef float element_type;
   1507 
   1508  constexpr vec3() : vec3(Float(0.0f)) {}
   1509  IMPLICIT constexpr vec3(Float a) : x(a), y(a), z(a) {}
   1510  constexpr vec3(Float x, Float y, Float z) : x(x), y(y), z(z) {}
   1511  vec3(vec2 a, Float z) : x(a.x), y(a.y), z(z) {}
   1512  explicit vec3(vec4);
   1513  IMPLICIT constexpr vec3(vec3_scalar s) : x(s.x), y(s.y), z(s.z) {}
   1514  constexpr vec3(vec3_scalar s0, vec3_scalar s1, vec3_scalar s2, vec3_scalar s3)
   1515      : x(Float{s0.x, s1.x, s2.x, s3.x}),
   1516        y(Float{s0.y, s1.y, s2.y, s3.y}),
   1517        z(Float{s0.z, s1.z, s2.z, s3.z}) {}
   1518  Float x;
   1519  Float y;
   1520  Float z;
   1521 
   1522  Float& select(XYZW c) {
   1523    switch (c) {
   1524      case X:
   1525        return x;
   1526      case Y:
   1527        return y;
   1528      case Z:
   1529        return z;
   1530      default:
   1531        UNREACHABLE;
   1532    }
   1533  }
   1534  Float& sel(XYZW c1) { return select(c1); }
   1535 
   1536  vec2 sel(XYZW c1, XYZW c2) { return vec2(select(c1), select(c2)); }
   1537 
   1538  vec3 sel(XYZW c1, XYZW c2, XYZW c3) {
   1539    return vec3(select(c1), select(c2), select(c3));
   1540  }
   1541 
   1542  vec4 sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4);
   1543 
   1544  vec2_ref lsel(XYZW c1, XYZW c2) { return vec2_ref(select(c1), select(c2)); }
   1545 
   1546  friend vec3 operator*(vec3 a, Float b) {
   1547    return vec3(a.x * b, a.y * b, a.z * b);
   1548  }
   1549  friend vec3 operator*(vec3 a, vec3 b) {
   1550    return vec3(a.x * b.x, a.y * b.y, a.z * b.z);
   1551  }
   1552  friend vec3 operator*(Float a, vec3 b) {
   1553    return vec3(a * b.x, a * b.y, a * b.z);
   1554  }
   1555 
   1556  friend vec3 operator/(vec3 a, Float b) {
   1557    return vec3(a.x / b, a.y / b, a.z / b);
   1558  }
   1559  friend vec3 operator/(vec3 a, vec3 b) {
   1560    return vec3(a.x / b.x, a.y / b.y, a.z / b.z);
   1561  }
   1562 
   1563  friend I32 operator==(const vec3& l, const vec3& r) {
   1564    return l.x == r.x && l.y == r.y && l.z == r.z;
   1565  }
   1566 
   1567  friend vec3 operator-(vec3 a, Float b) {
   1568    return vec3(a.x - b, a.y - b, a.z - b);
   1569  }
   1570  friend vec3 operator-(vec3 a, vec3 b) {
   1571    return vec3(a.x - b.x, a.y - b.y, a.z - b.z);
   1572  }
   1573  friend vec3 operator+(vec3 a, Float b) {
   1574    return vec3(a.x + b, a.y + b, a.z + b);
   1575  }
   1576  friend vec3 operator+(vec3 a, vec3 b) {
   1577    return vec3(a.x + b.x, a.y + b.y, a.z + b.z);
   1578  }
   1579 
   1580  vec3 operator+=(vec3_scalar a) {
   1581    x += a.x;
   1582    y += a.y;
   1583    z += a.z;
   1584    return *this;
   1585  }
   1586  vec3& operator+=(vec3 a) {
   1587    x += a.x;
   1588    y += a.y;
   1589    z += a.z;
   1590    return *this;
   1591  }
   1592 };
   1593 
   1594 vec3_scalar force_scalar(const vec3& v) {
   1595  return vec3_scalar{force_scalar(v.x), force_scalar(v.y), force_scalar(v.z)};
   1596 }
   1597 
   1598 vec3_scalar make_vec3(float n) { return vec3_scalar{n, n, n}; }
   1599 
   1600 vec3_scalar make_vec3(const vec2_scalar& v, float z) {
   1601  return vec3_scalar{v.x, v.y, z};
   1602 }
   1603 
   1604 vec3_scalar make_vec3(float x, float y, float z) {
   1605  return vec3_scalar{x, y, z};
   1606 }
   1607 
   1608 vec3_scalar make_vec3(int32_t x, int32_t y, float z) {
   1609  return vec3_scalar{float(x), float(y), z};
   1610 }
   1611 
   1612 template <typename N>
   1613 vec3 make_vec3(const N& n) {
   1614  return vec3(n);
   1615 }
   1616 
   1617 template <typename X, typename Y>
   1618 vec3 make_vec3(const X& x, const Y& y) {
   1619  return vec3(x, y);
   1620 }
   1621 
   1622 template <typename X, typename Y, typename Z>
   1623 vec3 make_vec3(const X& x, const Y& y, const Z& z) {
   1624  return vec3(x, y, z);
   1625 }
   1626 
   1627 SI vec3 if_then_else(I32 c, vec3 t, vec3 e) {
   1628  return vec3(if_then_else(c, t.x, e.x), if_then_else(c, t.y, e.y),
   1629              if_then_else(c, t.z, e.z));
   1630 }
   1631 
   1632 SI vec3 if_then_else(int32_t c, vec3 t, vec3 e) { return c ? t : e; }
   1633 
   1634 SI vec3 if_then_else(ivec3 c, vec3 t, vec3 e) {
   1635  return vec3(if_then_else(c.x, t.x, e.x), if_then_else(c.y, t.y, e.y),
   1636              if_then_else(c.z, t.z, e.z));
   1637 }
   1638 
   1639 vec3 step(vec3 edge, vec3 x) {
   1640  return vec3(step(edge.x, x.x), step(edge.y, x.y), step(edge.z, x.z));
   1641 }
   1642 
   1643 vec3_scalar step(vec3_scalar edge, vec3_scalar x) {
   1644  return vec3_scalar(step(edge.x, x.x), step(edge.y, x.y), step(edge.z, x.z));
   1645 }
   1646 
   1647 SI vec3 min(vec3 a, vec3 b) {
   1648  return vec3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
   1649 }
   1650 SI vec3 min(vec3 a, Float b) {
   1651  return vec3(min(a.x, b), min(a.y, b), min(a.z, b));
   1652 }
   1653 SI vec3_scalar min(vec3_scalar a, vec3_scalar b) {
   1654  return vec3_scalar{min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)};
   1655 }
   1656 
   1657 SI vec3 max(vec3 a, vec3 b) {
   1658  return vec3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
   1659 }
   1660 SI vec3 max(vec3 a, Float b) {
   1661  return vec3(max(a.x, b), max(a.y, b), max(a.z, b));
   1662 }
   1663 SI vec3_scalar max(vec3_scalar a, vec3_scalar b) {
   1664  return vec3_scalar{max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)};
   1665 }
   1666 
   1667 vec3 pow(vec3 x, vec3 y) {
   1668  return vec3(pow(x.x, y.x), pow(x.y, y.y), pow(x.z, y.z));
   1669 }
   1670 
   1671 struct vec3_ref {
   1672  vec3_ref(Float& x, Float& y, Float& z) : x(x), y(y), z(z) {}
   1673  Float& x;
   1674  Float& y;
   1675  Float& z;
   1676  vec3_ref& operator=(const vec3& a) {
   1677    x = a.x;
   1678    y = a.y;
   1679    z = a.z;
   1680    return *this;
   1681  }
   1682 
   1683  vec3_ref& operator/=(Float a) {
   1684    x /= a;
   1685    y /= a;
   1686    z /= a;
   1687    return *this;
   1688  }
   1689 
   1690  vec3_ref& operator*=(Float a) {
   1691    x *= a;
   1692    y *= a;
   1693    z *= a;
   1694    return *this;
   1695  }
   1696 };
   1697 
   1698 struct vec4_scalar {
   1699  typedef struct vec4 vector_type;
   1700  typedef float element_type;
   1701 
   1702  float x;
   1703  float y;
   1704  float z;
   1705  float w;
   1706 
   1707  constexpr vec4_scalar() : vec4_scalar(0.0f) {}
   1708  IMPLICIT constexpr vec4_scalar(float a) : x(a), y(a), z(a), w(a) {}
   1709  constexpr vec4_scalar(float x, float y, float z, float w)
   1710      : x(x), y(y), z(z), w(w) {}
   1711  vec4_scalar(vec3_scalar xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
   1712 
   1713  static vec4_scalar load_from_ptr(const float* f) {
   1714    return vec4_scalar(f[0], f[1], f[2], f[3]);
   1715  }
   1716 
   1717  ALWAYS_INLINE float& select(XYZW c) {
   1718    switch (c) {
   1719      case X:
   1720        return x;
   1721      case Y:
   1722        return y;
   1723      case Z:
   1724        return z;
   1725      case W:
   1726        return w;
   1727      default:
   1728        UNREACHABLE;
   1729    }
   1730  }
   1731  float& sel(XYZW c1) { return select(c1); }
   1732  vec2_scalar sel(XYZW c1, XYZW c2) {
   1733    return vec2_scalar{select(c1), select(c2)};
   1734  }
   1735  vec3_scalar sel(XYZW c1, XYZW c2, XYZW c3) {
   1736    return vec3_scalar{select(c1), select(c2), select(c3)};
   1737  }
   1738  vec4_scalar sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   1739    return vec4_scalar{select(c1), select(c2), select(c3), select(c4)};
   1740  }
   1741  vec2_scalar_ref lsel(XYZW c1, XYZW c2) {
   1742    return vec2_scalar_ref(select(c1), select(c2));
   1743  }
   1744  vec3_scalar_ref lsel(XYZW c1, XYZW c2, XYZW c3) {
   1745    return vec3_scalar_ref(select(c1), select(c2), select(c3));
   1746  }
   1747 
   1748  friend vec4_scalar operator*(vec4_scalar a, vec4_scalar b) {
   1749    return vec4_scalar{a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w};
   1750  }
   1751  friend vec4_scalar operator*(vec4_scalar a, float b) {
   1752    return vec4_scalar{a.x * b, a.y * b, a.z * b, a.w * b};
   1753  }
   1754  friend vec4_scalar operator*(float a, vec4_scalar b) {
   1755    return vec4_scalar{a * b.x, a * b.y, a * b.z, a * b.w};
   1756  }
   1757  vec4_scalar& operator*=(float a) {
   1758    x *= a;
   1759    y *= a;
   1760    z *= a;
   1761    w *= a;
   1762    return *this;
   1763  }
   1764 
   1765  friend vec4_scalar operator-(vec4_scalar a, vec4_scalar b) {
   1766    return vec4_scalar{a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w};
   1767  }
   1768  friend vec4_scalar operator-(vec4_scalar a, float b) {
   1769    return vec4_scalar{a.x - b, a.y - b, a.z - b, a.w - b};
   1770  }
   1771  friend vec4_scalar operator+(vec4_scalar a, vec4_scalar b) {
   1772    return vec4_scalar{a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w};
   1773  }
   1774  friend vec4_scalar operator+(vec4_scalar a, float b) {
   1775    return vec4_scalar{a.x + b, a.y + b, a.z + b, a.w + b};
   1776  }
   1777 
   1778  friend vec4_scalar operator/(vec4_scalar a, vec4_scalar b) {
   1779    return vec4_scalar{a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w};
   1780  }
   1781  friend vec4_scalar operator/(vec4_scalar a, float b) {
   1782    return vec4_scalar{a.x / b, a.y / b, a.z / b, a.w / b};
   1783  }
   1784 
   1785  vec4_scalar& operator+=(vec4_scalar a) {
   1786    x += a.x;
   1787    y += a.y;
   1788    z += a.z;
   1789    w += a.w;
   1790    return *this;
   1791  }
   1792 
   1793  vec4_scalar& operator/=(vec4_scalar a) {
   1794    x /= a.x;
   1795    y /= a.y;
   1796    z /= a.z;
   1797    w /= a.w;
   1798    return *this;
   1799  }
   1800 
   1801  vec4_scalar& operator*=(vec4_scalar a) {
   1802    x *= a.x;
   1803    y *= a.y;
   1804    z *= a.z;
   1805    w *= a.w;
   1806    return *this;
   1807  }
   1808 
   1809  friend bool operator==(const vec4_scalar& l, const vec4_scalar& r) {
   1810    return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w;
   1811  }
   1812 
   1813  friend bool operator!=(const vec4_scalar& l, const vec4_scalar& r) {
   1814    return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w;
   1815  }
   1816 };
   1817 
   1818 vec3_scalar vec2_scalar::sel(XYZW c1, XYZW c2, XYZW c3) {
   1819  return {select(c1), select(c2), select(c3)};
   1820 }
   1821 vec4_scalar vec2_scalar::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   1822  return vec4_scalar{select(c1), select(c2), select(c3), select(c4)};
   1823 }
   1824 
   1825 struct vec4_ref {
   1826  vec4_ref(Float& x, Float& y, Float& z, Float& w) : x(x), y(y), z(z), w(w) {}
   1827  Float& x;
   1828  Float& y;
   1829  Float& z;
   1830  Float& w;
   1831 
   1832  vec4_ref& operator=(const vec4& a);
   1833 };
   1834 
   1835 struct vec4 {
   1836  typedef struct vec4 vector_type;
   1837  typedef float element_type;
   1838 
   1839  constexpr vec4() : vec4(Float(0.0f)) {}
   1840  IMPLICIT constexpr vec4(Float a) : x(a), y(a), z(a), w(a) {}
   1841  vec4(Float x, Float y, Float z, Float w) : x(x), y(y), z(z), w(w) {}
   1842  vec4(vec3 xyz, Float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
   1843  vec4(vec2 xy, vec2 zw) : x(xy.x), y(xy.y), z(zw.x), w(zw.y) {}
   1844  vec4(vec2 xy, Float z, Float w) : x(xy.x), y(xy.y), z(z), w(w) {}
   1845  vec4(Float x, Float y, vec2 zw) : x(x), y(y), z(zw.x), w(zw.y) {}
   1846  IMPLICIT constexpr vec4(vec4_scalar s) : x(s.x), y(s.y), z(s.z), w(s.w) {}
   1847  constexpr vec4(vec4_scalar s0, vec4_scalar s1, vec4_scalar s2, vec4_scalar s3)
   1848      : x(Float{s0.x, s1.x, s2.x, s3.x}),
   1849        y(Float{s0.y, s1.y, s2.y, s3.y}),
   1850        z(Float{s0.z, s1.z, s2.z, s3.z}),
   1851        w(Float{s0.w, s1.w, s2.w, s3.w}) {}
   1852  ALWAYS_INLINE Float& select(XYZW c) {
   1853    switch (c) {
   1854      case X:
   1855        return x;
   1856      case Y:
   1857        return y;
   1858      case Z:
   1859        return z;
   1860      case W:
   1861        return w;
   1862      default:
   1863        UNREACHABLE;
   1864    }
   1865  }
   1866  ALWAYS_INLINE Float& sel(XYZW c1) { return select(c1); }
   1867 
   1868  ALWAYS_INLINE vec2 sel(XYZW c1, XYZW c2) {
   1869    return vec2(select(c1), select(c2));
   1870  }
   1871 
   1872  ALWAYS_INLINE vec3 sel(XYZW c1, XYZW c2, XYZW c3) {
   1873    return vec3(select(c1), select(c2), select(c3));
   1874  }
   1875  ALWAYS_INLINE vec3_ref lsel(XYZW c1, XYZW c2, XYZW c3) {
   1876    return vec3_ref(select(c1), select(c2), select(c3));
   1877  }
   1878 
   1879  ALWAYS_INLINE vec2_ref lsel(XYZW c1, XYZW c2) {
   1880    return vec2_ref(select(c1), select(c2));
   1881  }
   1882 
   1883  ALWAYS_INLINE vec4 sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   1884    return vec4(select(c1), select(c2), select(c3), select(c4));
   1885  }
   1886  ALWAYS_INLINE vec4_ref lsel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   1887    return vec4_ref(select(c1), select(c2), select(c3), select(c4));
   1888  }
   1889 
   1890  Float& operator[](int index) {
   1891    switch (index) {
   1892      case 0:
   1893        return x;
   1894      case 1:
   1895        return y;
   1896      case 2:
   1897        return z;
   1898      case 3:
   1899        return w;
   1900      default:
   1901        UNREACHABLE;
   1902    }
   1903  }
   1904 
   1905  // glsl supports non-const indexing of vecs.
   1906  // hlsl doesn't. The code it generates is probably not wonderful.
   1907  Float operator[](I32 index) {
   1908    float sel_x = 0;
   1909    switch (index.x) {
   1910      case 0:
   1911        sel_x = x.x;
   1912        break;
   1913      case 1:
   1914        sel_x = y.x;
   1915        break;
   1916      case 2:
   1917        sel_x = z.x;
   1918        break;
   1919      case 3:
   1920        sel_x = w.x;
   1921        break;
   1922    }
   1923    float sel_y = 0;
   1924    switch (index.y) {
   1925      case 0:
   1926        sel_y = x.y;
   1927        break;
   1928      case 1:
   1929        sel_y = y.y;
   1930        break;
   1931      case 2:
   1932        sel_y = z.y;
   1933        break;
   1934      case 3:
   1935        sel_y = w.y;
   1936        break;
   1937    }
   1938    float sel_z = 0;
   1939    switch (index.z) {
   1940      case 0:
   1941        sel_z = x.z;
   1942        break;
   1943      case 1:
   1944        sel_z = y.z;
   1945        break;
   1946      case 2:
   1947        sel_z = z.z;
   1948        break;
   1949      case 3:
   1950        sel_z = w.z;
   1951        break;
   1952    }
   1953    float sel_w = 0;
   1954    switch (index.w) {
   1955      case 0:
   1956        sel_w = x.w;
   1957        break;
   1958      case 1:
   1959        sel_w = y.w;
   1960        break;
   1961      case 2:
   1962        sel_w = z.w;
   1963        break;
   1964      case 3:
   1965        sel_w = w.w;
   1966        break;
   1967    }
   1968    Float ret = {sel_x, sel_y, sel_z, sel_w};
   1969    return ret;
   1970  }
   1971 
   1972  friend vec4 operator/(vec4 a, Float b) {
   1973    return vec4(a.x / b, a.y / b, a.z / b, a.w / b);
   1974  }
   1975  friend vec4 operator/(vec4 a, vec4 b) {
   1976    return vec4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
   1977  }
   1978 
   1979  friend vec4 operator*(vec4 a, Float b) {
   1980    return vec4(a.x * b, a.y * b, a.z * b, a.w * b);
   1981  }
   1982 
   1983  friend vec4 operator*(Float b, vec4 a) {
   1984    return vec4(a.x * b, a.y * b, a.z * b, a.w * b);
   1985  }
   1986  friend vec4 operator*(vec4 a, vec4 b) {
   1987    return vec4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
   1988  }
   1989 
   1990  friend vec4 operator-(vec4 a, vec4 b) {
   1991    return vec4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
   1992  }
   1993  friend vec4 operator+(vec4 a, vec4 b) {
   1994    return vec4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
   1995  }
   1996  vec4& operator+=(vec4 a) {
   1997    x += a.x;
   1998    y += a.y;
   1999    z += a.z;
   2000    w += a.w;
   2001    return *this;
   2002  }
   2003  vec4& operator/=(vec4 a) {
   2004    x /= a.x;
   2005    y /= a.y;
   2006    z /= a.z;
   2007    w /= a.w;
   2008    return *this;
   2009  }
   2010  vec4& operator*=(vec4 a) {
   2011    x *= a.x;
   2012    y *= a.y;
   2013    z *= a.z;
   2014    w *= a.w;
   2015    return *this;
   2016  }
   2017  vec4& operator*=(Float a) {
   2018    x *= a;
   2019    y *= a;
   2020    z *= a;
   2021    w *= a;
   2022    return *this;
   2023  }
   2024 
   2025  friend I32 operator==(const vec4& l, const vec4& r) {
   2026    return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w;
   2027  }
   2028 
   2029  friend I32 operator!=(const vec4& l, const vec4& r) {
   2030    return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w;
   2031  }
   2032 
   2033  Float x;
   2034  Float y;
   2035  Float z;
   2036  Float w;
   2037 };
   2038 
   2039 inline vec4_ref& vec4_ref::operator=(const vec4& a) {
   2040  x = a.x;
   2041  y = a.y;
   2042  z = a.z;
   2043  w = a.w;
   2044  return *this;
   2045 }
   2046 
   2047 inline vec4 vec3::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   2048  return vec4(select(c1), select(c2), select(c3), select(c4));
   2049 }
   2050 
   2051 vec4_scalar force_scalar(const vec4& v) {
   2052  return vec4_scalar{force_scalar(v.x), force_scalar(v.y), force_scalar(v.z),
   2053                     force_scalar(v.w)};
   2054 }
   2055 
   2056 vec4_scalar make_vec4(float n) { return vec4_scalar{n, n, n, n}; }
   2057 
   2058 vec4_scalar make_vec4(const vec2_scalar& v, float z, float w) {
   2059  return vec4_scalar{v.x, v.y, z, w};
   2060 }
   2061 
   2062 vec4_scalar make_vec4(const vec2_scalar& a, const vec2_scalar& b) {
   2063  return vec4_scalar{a.x, a.y, b.x, b.y};
   2064 }
   2065 
   2066 vec4_scalar make_vec4(const vec3_scalar& v, float w) {
   2067  return vec4_scalar{v.x, v.y, v.z, w};
   2068 }
   2069 
   2070 vec4_scalar make_vec4(float x, float y, float z, float w) {
   2071  return vec4_scalar{x, y, z, w};
   2072 }
   2073 
   2074 vec4_scalar make_vec4(float x, float y, const vec2_scalar& v) {
   2075  return vec4_scalar{x, y, v.x, v.y};
   2076 }
   2077 
   2078 ivec4_scalar make_ivec4(const vec4_scalar& v) {
   2079  return ivec4_scalar{int32_t(v.x), int32_t(v.y), int32_t(v.z), int32_t(v.w)};
   2080 }
   2081 
   2082 template <typename N>
   2083 vec4 make_vec4(const N& n) {
   2084  return vec4(n);
   2085 }
   2086 
   2087 template <typename X, typename Y>
   2088 vec4 make_vec4(const X& x, const Y& y) {
   2089  return vec4(x, y);
   2090 }
   2091 
   2092 template <typename X, typename Y, typename Z>
   2093 vec4 make_vec4(const X& x, const Y& y, const Z& z) {
   2094  return vec4(x, y, z);
   2095 }
   2096 
   2097 template <typename X, typename Y, typename Z, typename W>
   2098 vec4 make_vec4(const X& x, const Y& y, const Z& z, const W& w) {
   2099  return vec4(x, y, z, w);
   2100 }
   2101 
   2102 vec4_scalar make_vec4(const ivec4_scalar& v) {
   2103  return vec4_scalar{float(v.x), float(v.y), float(v.z), float(v.w)};
   2104 }
   2105 
   2106 ALWAYS_INLINE vec3::vec3(vec4 v) : x(v.x), y(v.y), z(v.z) {}
   2107 
   2108 SI ivec4 roundfast(vec4 v, Float scale) {
   2109  return ivec4(roundfast(v.x, scale), roundfast(v.y, scale),
   2110               roundfast(v.z, scale), roundfast(v.w, scale));
   2111 }
   2112 
   2113 vec4 operator*(vec4_scalar a, Float b) {
   2114  return vec4(a.x * b, a.y * b, a.z * b, a.w * b);
   2115 }
   2116 
   2117 SI vec4 if_then_else(I32 c, vec4 t, vec4 e) {
   2118  return vec4(if_then_else(c, t.x, e.x), if_then_else(c, t.y, e.y),
   2119              if_then_else(c, t.z, e.z), if_then_else(c, t.w, e.w));
   2120 }
   2121 
   2122 SI vec4 if_then_else(int32_t c, vec4 t, vec4 e) { return c ? t : e; }
   2123 
   2124 SI vec4_scalar if_then_else(int32_t c, vec4_scalar t, vec4_scalar e) {
   2125  return c ? t : e;
   2126 }
   2127 
   2128 SI vec2 clamp(vec2 a, Float minVal, Float maxVal) {
   2129  return vec2(clamp(a.x, minVal, maxVal), clamp(a.y, minVal, maxVal));
   2130 }
   2131 
   2132 SI vec2 clamp(vec2 a, vec2 minVal, vec2 maxVal) {
   2133  return vec2(clamp(a.x, minVal.x, maxVal.x), clamp(a.y, minVal.y, maxVal.y));
   2134 }
   2135 
   2136 SI vec2_scalar clamp(vec2_scalar a, vec2_scalar minVal, vec2_scalar maxVal) {
   2137  return vec2_scalar{clamp(a.x, minVal.x, maxVal.x),
   2138                     clamp(a.y, minVal.y, maxVal.y)};
   2139 }
   2140 
   2141 SI vec2_scalar clamp(vec2_scalar a, float minVal, float maxVal) {
   2142  return vec2_scalar{clamp(a.x, minVal, maxVal), clamp(a.y, minVal, maxVal)};
   2143 }
   2144 
   2145 SI I32 clamp(I32 a, I32 minVal, I32 maxVal) {
   2146  a = if_then_else(a < minVal, minVal, a);
   2147  return if_then_else(a > maxVal, maxVal, a);
   2148 }
   2149 
   2150 SI vec3 clamp(vec3 a, Float minVal, Float maxVal) {
   2151  return vec3(clamp(a.x, minVal, maxVal), clamp(a.y, minVal, maxVal),
   2152              clamp(a.z, minVal, maxVal));
   2153 }
   2154 
   2155 SI vec3 clamp(vec3 a, vec3 minVal, vec3 maxVal) {
   2156  return vec3(clamp(a.x, minVal.x, maxVal.x), clamp(a.y, minVal.y, maxVal.y),
   2157              clamp(a.z, minVal.z, maxVal.z));
   2158 }
   2159 
   2160 SI vec4 clamp(vec4 a, Float minVal, Float maxVal) {
   2161  return vec4(clamp(a.x, minVal, maxVal), clamp(a.y, minVal, maxVal),
   2162              clamp(a.z, minVal, maxVal), clamp(a.w, minVal, maxVal));
   2163 }
   2164 
   2165 SI vec4 clamp(vec4 a, vec4 minVal, vec4 maxVal) {
   2166  return vec4(clamp(a.x, minVal.x, maxVal.x), clamp(a.y, minVal.y, maxVal.y),
   2167              clamp(a.z, minVal.z, maxVal.z), clamp(a.w, minVal.w, maxVal.w));
   2168 }
   2169 
   2170 SI vec4_scalar clamp(vec4_scalar a, vec4_scalar minVal, vec4_scalar maxVal) {
   2171  return vec4_scalar{
   2172      clamp(a.x, minVal.x, maxVal.x), clamp(a.y, minVal.y, maxVal.y),
   2173      clamp(a.z, minVal.z, maxVal.z), clamp(a.w, minVal.w, maxVal.w)};
   2174 }
   2175 
   2176 SI vec4_scalar clamp(vec4_scalar a, float minVal, float maxVal) {
   2177  return vec4_scalar{clamp(a.x, minVal, maxVal), clamp(a.y, minVal, maxVal),
   2178                     clamp(a.z, minVal, maxVal), clamp(a.w, minVal, maxVal)};
   2179 }
   2180 
   2181 vec4 step(vec4 edge, vec4 x) {
   2182  return vec4(step(edge.x, x.x), step(edge.y, x.y), step(edge.z, x.z),
   2183              step(edge.w, x.w));
   2184 }
   2185 
   2186 vec4_scalar step(vec4_scalar edge, vec4_scalar x) {
   2187  return vec4_scalar(step(edge.x, x.x), step(edge.y, x.y), step(edge.z, x.z),
   2188                     step(edge.w, x.w));
   2189 }
   2190 
   2191 template <typename T>
   2192 auto lessThanEqual(T x, T y) -> decltype(x <= y) {
   2193  return x <= y;
   2194 }
   2195 
   2196 template <typename T>
   2197 auto lessThan(T x, T y) -> decltype(x < y) {
   2198  return x < y;
   2199 }
   2200 
   2201 SI bvec3 lessThanEqual(vec3 x, vec3 y) {
   2202  return bvec3(lessThanEqual(x.x, y.x), lessThanEqual(x.y, y.y),
   2203               lessThanEqual(x.z, y.z));
   2204 }
   2205 
   2206 SI bvec2 lessThanEqual(vec2 x, vec2 y) {
   2207  return bvec2(lessThanEqual(x.x, y.x), lessThanEqual(x.y, y.y));
   2208 }
   2209 
   2210 SI bvec2_scalar lessThanEqual(vec2_scalar x, vec2_scalar y) {
   2211  return bvec2_scalar{lessThanEqual(x.x, y.x), lessThanEqual(x.y, y.y)};
   2212 }
   2213 
   2214 SI bvec4 lessThanEqual(vec4 x, vec4 y) {
   2215  return bvec4(lessThanEqual(x.x, y.x), lessThanEqual(x.y, y.y),
   2216               lessThanEqual(x.z, y.z), lessThanEqual(x.w, y.w));
   2217 }
   2218 
   2219 SI bvec4_scalar lessThanEqual(vec4_scalar x, vec4_scalar y) {
   2220  return bvec4_scalar{lessThanEqual(x.x, y.x), lessThanEqual(x.y, y.y),
   2221                      lessThanEqual(x.z, y.z), lessThanEqual(x.w, y.w)};
   2222 }
   2223 
   2224 SI bvec2 lessThan(vec2 x, vec2 y) {
   2225  return bvec2(lessThan(x.x, y.x), lessThan(x.y, y.y));
   2226 }
   2227 
   2228 SI bvec2_scalar lessThan(vec2_scalar x, vec2_scalar y) {
   2229  return bvec2_scalar(lessThan(x.x, y.x), lessThan(x.y, y.y));
   2230 }
   2231 
   2232 SI bvec4 lessThan(vec4 x, vec4 y) {
   2233  return bvec4(lessThan(x.x, y.x), lessThan(x.y, y.y), lessThan(x.z, y.z),
   2234               lessThan(x.w, y.w));
   2235 }
   2236 
   2237 SI bvec4_scalar lessThan(vec4_scalar x, vec4_scalar y) {
   2238  return bvec4_scalar{lessThan(x.x, y.x), lessThan(x.y, y.y),
   2239                      lessThan(x.z, y.z), lessThan(x.w, y.w)};
   2240 }
   2241 
   2242 template <typename T>
   2243 auto greaterThan(T x, T y) -> decltype(x > y) {
   2244  return x > y;
   2245 }
   2246 
   2247 bvec2 greaterThan(vec2 x, vec2 y) {
   2248  return bvec2(greaterThan(x.x, y.x), greaterThan(x.y, y.y));
   2249 }
   2250 
   2251 bvec2_scalar greaterThan(vec2_scalar x, vec2_scalar y) {
   2252  return bvec2_scalar(greaterThan(x.x, y.x), greaterThan(x.y, y.y));
   2253 }
   2254 
   2255 SI bvec4 greaterThan(vec4 x, vec4 y) {
   2256  return bvec4(greaterThan(x.x, y.x), greaterThan(x.y, y.y),
   2257               greaterThan(x.z, y.z), greaterThan(x.w, y.w));
   2258 }
   2259 
   2260 SI bvec4_scalar greaterThan(vec4_scalar x, vec4_scalar y) {
   2261  return bvec4_scalar{greaterThan(x.x, y.x), greaterThan(x.y, y.y),
   2262                      greaterThan(x.z, y.z), greaterThan(x.w, y.w)};
   2263 }
   2264 
   2265 template <typename T>
   2266 auto greaterThanEqual(T x, T y) -> decltype(x >= y) {
   2267  return x >= y;
   2268 }
   2269 
   2270 bvec4 greaterThanEqual(vec4 x, vec4 y) {
   2271  return bvec4(greaterThanEqual(x.x, y.x), greaterThanEqual(x.y, y.y),
   2272               greaterThanEqual(x.z, y.z), greaterThanEqual(x.w, y.w));
   2273 }
   2274 
   2275 template <typename T>
   2276 auto equal(T x, T y) -> decltype(x > y) {
   2277  return x == y;
   2278 }
   2279 
   2280 bvec2 equal(vec2 x, vec2 y) { return bvec2(equal(x.x, y.x), equal(x.y, y.y)); }
   2281 
   2282 bvec2_scalar equal(vec2_scalar x, vec2_scalar y) {
   2283  return bvec2_scalar(equal(x.x, y.x), equal(x.y, y.y));
   2284 }
   2285 
   2286 template <typename T>
   2287 auto notEqual(T x, T y) -> decltype(x > y) {
   2288  return x != y;
   2289 }
   2290 
   2291 bvec2 notEqual(vec2 x, vec2 y) {
   2292  return bvec2(notEqual(x.x, y.x), notEqual(x.y, y.y));
   2293 }
   2294 
   2295 bvec2_scalar notEqual(vec2_scalar x, vec2_scalar y) {
   2296  return bvec2_scalar(notEqual(x.x, y.x), notEqual(x.y, y.y));
   2297 }
   2298 
   2299 vec3 floor(vec3 v) { return vec3(floor(v.x), floor(v.y), floor(v.z)); }
   2300 
   2301 vec4 floor(vec4 v) {
   2302  return vec4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
   2303 }
   2304 
   2305 struct mat4_scalar;
   2306 
   2307 struct mat2_scalar {
   2308  vec2_scalar data[2];
   2309 
   2310  mat2_scalar() = default;
   2311  IMPLICIT constexpr mat2_scalar(float a)
   2312      : data{vec2_scalar(a), vec2_scalar(a)} {}
   2313  constexpr mat2_scalar(vec2_scalar a, vec2_scalar b) : data{a, b} {}
   2314  IMPLICIT mat2_scalar(const mat4_scalar& mat);
   2315 
   2316  vec2_scalar& operator[](int index) { return data[index]; }
   2317  const vec2_scalar& operator[](int index) const { return data[index]; }
   2318 
   2319  friend vec2_scalar operator*(mat2_scalar m, vec2_scalar v) {
   2320    vec2_scalar u;
   2321    u.x = m[0].x * v.x + m[1].x * v.y;
   2322    u.y = m[0].y * v.x + m[1].y * v.y;
   2323    return u;
   2324  }
   2325 
   2326  friend vec2 operator*(mat2_scalar m, vec2 v) {
   2327    vec2 u;
   2328    u.x = m[0].x * v.x + m[1].x * v.y;
   2329    u.y = m[0].y * v.x + m[1].y * v.y;
   2330    return u;
   2331  }
   2332 
   2333  friend mat2_scalar operator*(mat2_scalar m, float f) {
   2334    mat2_scalar u = m;
   2335    u[0].x *= f;
   2336    u[0].y *= f;
   2337    u[1].x *= f;
   2338    u[1].y *= f;
   2339    return u;
   2340  }
   2341 };
   2342 
   2343 struct mat4;
   2344 
   2345 struct mat2 {
   2346  vec2 data[2];
   2347 
   2348  vec2& operator[](int index) { return data[index]; }
   2349  const vec2& operator[](int index) const { return data[index]; }
   2350  mat2() = default;
   2351 
   2352  IMPLICIT constexpr mat2(Float a) : data{vec2(a), vec2(a)} {}
   2353 
   2354  constexpr mat2(vec2 a, vec2 b) : data{a, b} {}
   2355  IMPLICIT mat2(const mat4& mat);
   2356  IMPLICIT constexpr mat2(mat2_scalar s)
   2357      : data{vec2(s.data[0]), vec2(s.data[1])} {}
   2358 
   2359  friend vec2 operator*(mat2 m, vec2 v) {
   2360    vec2 u;
   2361    u.x = m[0].x * v.x + m[1].x * v.y;
   2362    u.y = m[0].y * v.x + m[1].y * v.y;
   2363    return u;
   2364  }
   2365  friend mat2 operator*(mat2 m, Float f) {
   2366    mat2 u = m;
   2367    u[0].x *= f;
   2368    u[0].y *= f;
   2369    u[1].x *= f;
   2370    u[1].y *= f;
   2371    return u;
   2372  }
   2373 };
   2374 
   2375 mat2_scalar make_mat2(float n) { return mat2_scalar{{n, n}, {n, n}}; }
   2376 
   2377 mat2_scalar make_mat2(const mat2_scalar& m) { return m; }
   2378 
   2379 mat2_scalar make_mat2(const vec2_scalar& x, const vec2_scalar& y) {
   2380  return mat2_scalar{x, y};
   2381 }
   2382 
   2383 template <typename N>
   2384 mat2 make_mat2(const N& n) {
   2385  return mat2(n);
   2386 }
   2387 
   2388 template <typename X, typename Y>
   2389 mat2 make_mat2(const X& x, const Y& y) {
   2390  return mat2(x, y);
   2391 }
   2392 
   2393 SI mat2 if_then_else(I32 c, mat2 t, mat2 e) {
   2394  return mat2(if_then_else(c, t[0], e[0]), if_then_else(c, t[0], e[1]));
   2395 }
   2396 
   2397 SI mat2 if_then_else(int32_t c, mat2 t, mat2 e) { return c ? t : e; }
   2398 
   2399 struct mat3_scalar {
   2400  vec3_scalar data[3];
   2401 
   2402  mat3_scalar() = default;
   2403  constexpr mat3_scalar(vec3_scalar a, vec3_scalar b, vec3_scalar c)
   2404      : data{a, b, c} {}
   2405  IMPLICIT mat3_scalar(const mat4_scalar& mat);
   2406 
   2407  vec3_scalar& operator[](int index) { return data[index]; }
   2408  const vec3_scalar& operator[](int index) const { return data[index]; }
   2409 
   2410  friend vec3_scalar operator*(mat3_scalar m, vec3_scalar v) {
   2411    vec3_scalar u;
   2412    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
   2413    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
   2414    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
   2415    return u;
   2416  }
   2417 
   2418  friend vec3 operator*(mat3_scalar m, vec3 v) {
   2419    vec3 u;
   2420    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
   2421    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
   2422    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
   2423    return u;
   2424  }
   2425 
   2426  friend auto operator*(mat3_scalar a, mat3_scalar b) {
   2427    mat3_scalar r;
   2428    for (int c = 0; c < 3; c++) {
   2429      const auto& v = b[c];
   2430      r[c].x = a[0].x * v.x + a[1].x * v.y + a[2].x * v.z;
   2431      r[c].y = a[0].y * v.x + a[1].y * v.y + a[2].y * v.z;
   2432      r[c].z = a[0].z * v.x + a[1].z * v.y + a[2].z * v.z;
   2433    }
   2434    return r;
   2435  }
   2436 };
   2437 
   2438 struct mat3 {
   2439  vec3 data[3];
   2440 
   2441  vec3& operator[](int index) { return data[index]; }
   2442  const vec3& operator[](int index) const { return data[index]; }
   2443  mat3() = default;
   2444  constexpr mat3(vec3 a, vec3 b, vec3 c) : data{a, b, c} {}
   2445 
   2446  IMPLICIT constexpr mat3(mat3_scalar s)
   2447      : data{vec3(s.data[0]), vec3(s.data[1]), vec3(s.data[2])} {}
   2448 
   2449  constexpr mat3(mat3_scalar s0, mat3_scalar s1, mat3_scalar s2, mat3_scalar s3)
   2450      : data{vec3(s0.data[0], s1.data[0], s2.data[0], s3.data[0]),
   2451             vec3(s0.data[1], s1.data[1], s2.data[1], s3.data[1]),
   2452             vec3(s0.data[2], s1.data[2], s2.data[2], s3.data[2])} {}
   2453 
   2454  constexpr mat3(Float d1, Float d2, Float d3, Float d4, Float d5, Float d6,
   2455                 Float d7, Float d8, Float d9)
   2456      : data{vec3(d1, d2, d3), vec3(d4, d5, d6), vec3(d7, d8, d9)} {}
   2457 
   2458  IMPLICIT mat3(const mat4& mat);
   2459 
   2460  friend vec3 operator*(mat3 m, vec3 v) {
   2461    vec3 u;
   2462    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
   2463    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
   2464    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
   2465    return u;
   2466  }
   2467 };
   2468 
   2469 mat3_scalar force_scalar(const mat3& v) {
   2470  return mat3_scalar{force_scalar(v[0]), force_scalar(v[1]),
   2471                     force_scalar(v[2])};
   2472 }
   2473 
   2474 mat3_scalar make_mat3(const mat3_scalar& m) { return m; }
   2475 
   2476 mat3_scalar make_mat3(const vec3_scalar& x, const vec3_scalar& y,
   2477                      const vec3_scalar& z) {
   2478  return mat3_scalar{x, y, z};
   2479 }
   2480 
   2481 constexpr mat3_scalar make_mat3(float m0, float m1, float m2, float m3,
   2482                                float m4, float m5, float m6, float m7,
   2483                                float m8) {
   2484  return mat3_scalar{{m0, m1, m2}, {m3, m4, m5}, {m6, m7, m8}};
   2485 }
   2486 
   2487 template <typename N>
   2488 mat3 make_mat3(const N& n) {
   2489  return mat3(n);
   2490 }
   2491 
   2492 template <typename X, typename Y, typename Z>
   2493 mat3 make_mat3(const X& x, const Y& y, const Z& z) {
   2494  return mat3(x, y, z);
   2495 }
   2496 
   2497 struct mat3x4_scalar {
   2498  vec4_scalar data[3];
   2499 
   2500  mat3x4_scalar() = default;
   2501  constexpr mat3x4_scalar(vec4_scalar a, vec4_scalar b, vec4_scalar c)
   2502      : data{a, b, c} {}
   2503 
   2504  auto& operator[](int index) { return data[index]; }
   2505  constexpr auto operator[](int index) const { return data[index]; }
   2506 
   2507  friend auto operator*(mat3x4_scalar m, vec3_scalar v) {
   2508    vec4_scalar u;
   2509    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
   2510    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
   2511    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
   2512    u.w = m[0].w * v.x + m[1].w * v.y + m[2].w * v.z;
   2513    return u;
   2514  }
   2515 
   2516  friend auto operator*(mat3x4_scalar m, vec3 v) {
   2517    vec4 u;
   2518    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z;
   2519    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z;
   2520    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z;
   2521    u.w = m[0].w * v.x + m[1].w * v.y + m[2].w * v.z;
   2522    return u;
   2523  }
   2524 };
   2525 
   2526 constexpr mat3x4_scalar make_mat3x4(float m0, float m1, float m2, float m3,
   2527                                    float m4, float m5, float m6, float m7,
   2528                                    float m8, float m9, float m10, float m11) {
   2529  return mat3x4_scalar{
   2530      {m0, m1, m2, m3},
   2531      {m4, m5, m6, m7},
   2532      {m8, m9, m10, m11},
   2533  };
   2534 }
   2535 
   2536 struct mat4x3_scalar {
   2537  vec3_scalar data[4];
   2538 
   2539  mat4x3_scalar() = default;
   2540  constexpr mat4x3_scalar(vec3_scalar a, vec3_scalar b, vec3_scalar c,
   2541                          vec3_scalar d)
   2542      : data{a, b, c, d} {}
   2543 
   2544  auto& operator[](int index) { return data[index]; }
   2545  constexpr auto operator[](int index) const { return data[index]; }
   2546 
   2547  friend auto operator*(mat4x3_scalar m, vec4_scalar v) {
   2548    vec3_scalar u;
   2549    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z + m[3].x * v.w;
   2550    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z + m[3].y * v.w;
   2551    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z + m[3].z * v.w;
   2552    return u;
   2553  }
   2554 
   2555  friend auto operator*(mat4x3_scalar m, vec4 v) {
   2556    vec3 u;
   2557    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z + m[3].x * v.w;
   2558    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z + m[3].y * v.w;
   2559    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z + m[3].z * v.w;
   2560    return u;
   2561  }
   2562 };
   2563 
   2564 constexpr mat4x3_scalar transpose(const mat3x4_scalar m) {
   2565  return {{m[0].x, m[1].x, m[2].x},
   2566          {m[0].y, m[1].y, m[2].y},
   2567          {m[0].z, m[1].z, m[2].z},
   2568          {m[0].w, m[1].w, m[2].w}};
   2569 }
   2570 
   2571 struct mat4_scalar {
   2572  vec4_scalar data[4];
   2573 
   2574  mat4_scalar() = default;
   2575  constexpr mat4_scalar(vec4_scalar a, vec4_scalar b, vec4_scalar c,
   2576                        vec4_scalar d)
   2577      : data{a, b, c, d} {}
   2578 
   2579  vec4_scalar& operator[](int index) { return data[index]; }
   2580  const vec4_scalar& operator[](int index) const { return data[index]; }
   2581 
   2582  static mat4_scalar load_from_ptr(const float* f) {
   2583    return mat4_scalar(
   2584        vec4_scalar::load_from_ptr(&f[0]), vec4_scalar::load_from_ptr(&f[4]),
   2585        vec4_scalar::load_from_ptr(&f[8]), vec4_scalar::load_from_ptr(&f[12]));
   2586  }
   2587 
   2588  friend vec4_scalar operator*(mat4_scalar m, vec4_scalar v) {
   2589    vec4_scalar u;
   2590    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z + m[3].x * v.w;
   2591    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z + m[3].y * v.w;
   2592    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z + m[3].z * v.w;
   2593    u.w = m[0].w * v.x + m[1].w * v.y + m[2].w * v.z + m[3].w * v.w;
   2594    return u;
   2595  }
   2596 
   2597  friend vec4 operator*(mat4_scalar m, vec4 v) {
   2598    vec4 u;
   2599    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z + m[3].x * v.w;
   2600    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z + m[3].y * v.w;
   2601    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z + m[3].z * v.w;
   2602    u.w = m[0].w * v.x + m[1].w * v.y + m[2].w * v.z + m[3].w * v.w;
   2603    return u;
   2604  }
   2605 };
   2606 
   2607 struct mat4 {
   2608  vec4 data[4];
   2609 
   2610  mat4() = default;
   2611  IMPLICIT constexpr mat4(mat4_scalar s)
   2612      : data{vec4(s.data[0]), vec4(s.data[1]), vec4(s.data[2]),
   2613             vec4(s.data[3])} {}
   2614 
   2615  constexpr mat4(vec4 a, vec4 b, vec4 c, vec4 d) : data{a, b, c, d} {}
   2616 
   2617  vec4& operator[](int index) { return data[index]; }
   2618  const vec4& operator[](int index) const { return data[index]; }
   2619 
   2620  friend vec4 operator*(mat4 m, vec4 v) {
   2621    vec4 u;
   2622    u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z + m[3].x * v.w;
   2623    u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z + m[3].y * v.w;
   2624    u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z + m[3].z * v.w;
   2625    u.w = m[0].w * v.x + m[1].w * v.y + m[2].w * v.z + m[3].w * v.w;
   2626    return u;
   2627  }
   2628 };
   2629 
   2630 mat3::mat3(const mat4& mat)
   2631    : mat3(vec3(mat[0].x, mat[0].y, mat[0].z),
   2632           vec3(mat[1].x, mat[1].y, mat[1].z),
   2633           vec3(mat[2].x, mat[2].y, mat[2].z)) {}
   2634 
   2635 IMPLICIT mat3_scalar::mat3_scalar(const mat4_scalar& mat)
   2636    : mat3_scalar(vec3_scalar(mat[0].x, mat[0].y, mat[0].z),
   2637                  vec3_scalar(mat[1].x, mat[1].y, mat[1].z),
   2638                  vec3_scalar(mat[2].x, mat[2].y, mat[2].z)) {}
   2639 
   2640 IMPLICIT mat2::mat2(const mat4& mat)
   2641    : mat2(vec2(mat[0].x, mat[0].y), vec2(mat[1].x, mat[1].y)) {}
   2642 
   2643 IMPLICIT mat2_scalar::mat2_scalar(const mat4_scalar& mat)
   2644    : mat2_scalar(vec2_scalar(mat[0].x, mat[0].y),
   2645                  vec2_scalar(mat[1].x, mat[1].y)) {}
   2646 
   2647 mat2_scalar make_mat2(const mat4_scalar& m) { return mat2_scalar(m); }
   2648 
   2649 mat3_scalar make_mat3(const mat4_scalar& m) { return mat3_scalar(m); }
   2650 
   2651 mat4_scalar force_scalar(const mat4& v) {
   2652  return mat4_scalar(force_scalar(v[0]), force_scalar(v[1]), force_scalar(v[2]),
   2653                     force_scalar(v[3]));
   2654 }
   2655 
   2656 mat4_scalar make_mat4(const mat4_scalar& m) { return m; }
   2657 
   2658 mat4_scalar make_mat4(const vec4_scalar& x, const vec4_scalar& y,
   2659                      const vec4_scalar& z, const vec4_scalar& w) {
   2660  return mat4_scalar{x, y, z, w};
   2661 }
   2662 
   2663 constexpr mat4_scalar make_mat4(float m0, float m1, float m2, float m3,
   2664                                float m4, float m5, float m6, float m7,
   2665                                float m8, float m9, float m10, float m11,
   2666                                float m12, float m13, float m14, float m15) {
   2667  return mat4_scalar{{m0, m1, m2, m3},
   2668                     {m4, m5, m6, m7},
   2669                     {m8, m9, m10, m11},
   2670                     {m12, m13, m14, m15}};
   2671 }
   2672 
   2673 template <typename N>
   2674 mat4 make_mat4(const N& n) {
   2675  return mat4(n);
   2676 }
   2677 
   2678 template <typename X, typename Y, typename Z, typename W>
   2679 mat4 make_mat4(const X& x, const Y& y, const Z& z, const W& w) {
   2680  return mat4(x, y, z, w);
   2681 }
   2682 
   2683 SI mat3 if_then_else(I32 c, mat3 t, mat3 e) {
   2684  return mat3{if_then_else(c, t[0], e[0]), if_then_else(c, t[1], e[1]),
   2685              if_then_else(c, t[2], e[2])};
   2686 }
   2687 
   2688 SI mat3 if_then_else(int32_t c, mat3 t, mat3 e) { return c ? t : e; }
   2689 
   2690 SI mat4 if_then_else(I32 c, mat4 t, mat4 e) {
   2691  return mat4{if_then_else(c, t[0], e[0]), if_then_else(c, t[1], e[1]),
   2692              if_then_else(c, t[2], e[2]), if_then_else(c, t[3], e[3])};
   2693 }
   2694 
   2695 SI mat4 if_then_else(int32_t c, mat4 t, mat4 e) { return c ? t : e; }
   2696 
   2697 template <typename T, typename U, typename A,
   2698          typename R = typename T::vector_type>
   2699 SI R mix(T x, U y, A a) {
   2700  return (y - x) * a + x;
   2701 }
   2702 
   2703 SI Float mix(Float x, Float y, Float a) { return (y - x) * a + x; }
   2704 
   2705 template <typename T>
   2706 SI T mix(T x, T y, float a) {
   2707  return (y - x) * a + x;
   2708 }
   2709 
   2710 template <typename T>
   2711 SI T mix(T x, T y, vec2_scalar a) {
   2712  return T{mix(x.x, y.x, a.x), mix(x.y, y.y, a.y)};
   2713 }
   2714 
   2715 template <typename T>
   2716 SI T mix(T x, T y, vec3_scalar a) {
   2717  return T{mix(x.x, y.x, a.x), mix(x.y, y.y, a.y), mix(x.z, y.z, a.z)};
   2718 }
   2719 
   2720 template <typename T>
   2721 SI T mix(T x, T y, vec4_scalar a) {
   2722  return T{mix(x.x, y.x, a.x), mix(x.y, y.y, a.y), mix(x.z, y.z, a.z),
   2723           mix(x.w, y.w, a.w)};
   2724 }
   2725 
   2726 ivec4 ivec2::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   2727  return ivec4(select(c1), select(c2), select(c3), select(c4));
   2728 }
   2729 
   2730 vec4 vec2::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
   2731  return vec4(select(c1), select(c2), select(c3), select(c4));
   2732 }
   2733 
   2734 bool any(bool x) { return x; }
   2735 
   2736 Bool any(bvec4 x) { return x.x | x.y | x.z | x.w; }
   2737 
   2738 bool any(bvec4_scalar x) { return x.x | x.y | x.z | x.w; }
   2739 
   2740 Bool any(bvec2 x) { return x.x | x.y; }
   2741 
   2742 bool any(bvec2_scalar x) { return x.x | x.y; }
   2743 
   2744 bool all(bool x) { return x; }
   2745 
   2746 Bool all(bvec2 x) { return x.x & x.y; }
   2747 
   2748 bool all(bvec2_scalar x) { return x.x & x.y; }
   2749 
   2750 Bool all(bvec4 x) { return x.x & x.y & x.z & x.w; }
   2751 
   2752 bool all(bvec4_scalar x) { return x.x & x.y & x.z & x.w; }
   2753 
   2754 SI vec4 if_then_else(bvec4 c, vec4 t, vec4 e) {
   2755  return vec4(if_then_else(c.x, t.x, e.x), if_then_else(c.y, t.y, e.y),
   2756              if_then_else(c.z, t.z, e.z), if_then_else(c.w, t.w, e.w));
   2757 }
   2758 SI vec3 if_then_else(bvec3 c, vec3 t, vec3 e) {
   2759  return vec3(if_then_else(c.x, t.x, e.x), if_then_else(c.y, t.y, e.y),
   2760              if_then_else(c.z, t.z, e.z));
   2761 }
   2762 
   2763 SI vec2 if_then_else(bvec2 c, vec2 t, vec2 e) {
   2764  return vec2(if_then_else(c.x, t.x, e.x), if_then_else(c.y, t.y, e.y));
   2765 }
   2766 
   2767 template <typename T, typename R = typename T::vector_type>
   2768 SI R mix(T x, T y, bvec4 a) {
   2769  return if_then_else(a, y, x);
   2770 }
   2771 
   2772 template <typename T, typename R = typename T::vector_type>
   2773 SI R mix(T x, T y, bvec3 a) {
   2774  return if_then_else(a, y, x);
   2775 }
   2776 
   2777 template <typename T, typename R = typename T::vector_type>
   2778 SI R mix(T x, T y, bvec2 a) {
   2779  return if_then_else(a, y, x);
   2780 }
   2781 
   2782 template <typename T>
   2783 SI T mix(T x, T y, bvec4_scalar a) {
   2784  return T{a.x ? y.x : x.x, a.y ? y.y : x.y, a.z ? y.z : x.z, a.w ? y.w : x.w};
   2785 }
   2786 
   2787 template <typename T>
   2788 SI T mix(T x, T y, bvec4_scalar1 a) {
   2789  return a.x ? y : x;
   2790 }
   2791 
   2792 template <typename T>
   2793 SI T mix(T x, T y, bvec3_scalar a) {
   2794  return T{a.x ? y.x : x.x, a.y ? y.y : x.y, a.z ? y.z : x.z};
   2795 }
   2796 
   2797 template <typename T>
   2798 SI T mix(T x, T y, bvec3_scalar1 a) {
   2799  return a.x ? y : x;
   2800 }
   2801 
   2802 template <typename T>
   2803 SI T mix(T x, T y, bvec2_scalar a) {
   2804  return T{a.x ? y.x : x.x, a.y ? y.y : x.y};
   2805 }
   2806 
   2807 template <typename T>
   2808 SI T mix(T x, T y, bvec2_scalar1 a) {
   2809  return a.x ? y : x;
   2810 }
   2811 
   2812 float dot(vec3_scalar a, vec3_scalar b) {
   2813  return a.x * b.x + a.y * b.y + a.z * b.z;
   2814 }
   2815 
   2816 Float dot(vec3 a, vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
   2817 
   2818 float dot(vec2_scalar a, vec2_scalar b) { return a.x * b.x + a.y * b.y; }
   2819 
   2820 Float dot(vec2 a, vec2 b) { return a.x * b.x + a.y * b.y; }
   2821 
   2822 #define sin __glsl_sin
   2823 
   2824 float sin(float x) { return sinf(x); }
   2825 
   2826 Float sin(Float v) { return {sinf(v.x), sinf(v.y), sinf(v.z), sinf(v.w)}; }
   2827 
   2828 #define cos __glsl_cos
   2829 
   2830 float cos(float x) { return cosf(x); }
   2831 
   2832 Float cos(Float v) { return {cosf(v.x), cosf(v.y), cosf(v.z), cosf(v.w)}; }
   2833 
   2834 #define tan __glsl_tan
   2835 
   2836 float tan(float x) { return tanf(x); }
   2837 
   2838 Float tan(Float v) { return {tanf(v.x), tanf(v.y), tanf(v.z), tanf(v.w)}; }
   2839 
   2840 #define atan __glsl_atan
   2841 
   2842 float atan(float x) { return atanf(x); }
   2843 
   2844 Float atan(Float v) { return {atanf(v.x), atanf(v.y), atanf(v.z), atanf(v.w)}; }
   2845 
   2846 float atan(float a, float b) { return atan2f(a, b); }
   2847 
   2848 Float atan(Float a, Float b) {
   2849  return {atan2f(a.x, b.x), atan2f(a.y, b.y), atan2f(a.z, b.z),
   2850          atan2f(a.w, b.w)};
   2851 }
   2852 
   2853 bvec4 equal(vec4 x, vec4 y) {
   2854  return bvec4(equal(x.x, y.x), equal(x.y, y.y), equal(x.z, y.z),
   2855               equal(x.w, y.w));
   2856 }
   2857 
   2858 bvec4_scalar equal(vec4_scalar x, vec4_scalar y) {
   2859  return bvec4_scalar(equal(x.x, y.x), equal(x.y, y.y), equal(x.z, y.z),
   2860                      equal(x.w, y.w));
   2861 }
   2862 
   2863 bvec4 notEqual(vec4 x, vec4 y) {
   2864  return bvec4(notEqual(x.x, y.x), notEqual(x.y, y.y), notEqual(x.z, y.z),
   2865               notEqual(x.w, y.w));
   2866 }
   2867 
   2868 bvec4_scalar notEqual(vec4_scalar x, vec4_scalar y) {
   2869  return bvec4_scalar(notEqual(x.x, y.x), notEqual(x.y, y.y),
   2870                      notEqual(x.z, y.z), notEqual(x.w, y.w));
   2871 }
   2872 
   2873 bvec4 notEqual(ivec4 a, ivec4 b) {
   2874  return bvec4(a.x != b.x, a.y != b.y, a.z != b.z, a.w != b.w);
   2875 }
   2876 
   2877 bvec4_scalar notEqual(ivec4_scalar a, ivec4_scalar b) {
   2878  return bvec4_scalar{a.x != b.x, a.y != b.y, a.z != b.z, a.w != b.w};
   2879 }
   2880 
   2881 mat3 transpose(mat3 m) {
   2882  return mat3(vec3(m[0].x, m[1].x, m[2].x), vec3(m[0].y, m[1].y, m[2].y),
   2883              vec3(m[0].z, m[1].z, m[2].z));
   2884 }
   2885 
   2886 mat3_scalar transpose(mat3_scalar m) {
   2887  return mat3_scalar{vec3_scalar(m[0].x, m[1].x, m[2].x),
   2888                     vec3_scalar(m[0].y, m[1].y, m[2].y),
   2889                     vec3_scalar(m[0].z, m[1].z, m[2].z)};
   2890 }
   2891 
   2892 vec2 abs(vec2 v) { return vec2(abs(v.x), abs(v.y)); }
   2893 
   2894 vec2_scalar abs(vec2_scalar v) { return vec2_scalar{fabsf(v.x), fabsf(v.y)}; }
   2895 
   2896 vec2 sign(vec2 v) { return vec2(sign(v.x), sign(v.y)); }
   2897 
   2898 vec2_scalar sign(vec2_scalar v) { return vec2_scalar{sign(v.x), sign(v.y)}; }
   2899 
   2900 Float mod(Float a, Float b) { return a - b * floor(a / b); }
   2901 
   2902 vec2 mod(vec2 a, vec2 b) { return vec2(mod(a.x, b.x), mod(a.y, b.y)); }
   2903 
   2904 vec3 abs(vec3 v) { return vec3(abs(v.x), abs(v.y), abs(v.z)); }
   2905 
   2906 vec3 sign(vec3 v) { return vec3(sign(v.x), sign(v.y), sign(v.z)); }
   2907 
   2908 mat2 inverse(mat2 v) {
   2909  Float det = v[0].x * v[1].y - v[0].y * v[1].x;
   2910  return mat2(vec2(v[1].y, -v[0].y), vec2(-v[1].x, v[0].x)) * (1. / det);
   2911 }
   2912 
   2913 mat2_scalar inverse(mat2_scalar v) {
   2914  float det = v[0].x * v[1].y - v[0].y * v[1].x;
   2915  return mat2_scalar{{v[1].y, -v[0].y}, {-v[1].x, v[0].x}} * (1. / det);
   2916 }
   2917 
   2918 int32_t get_nth(I32 a, int n) { return a[n]; }
   2919 
   2920 float get_nth(Float a, int n) { return a[n]; }
   2921 
   2922 float get_nth(float a, int) { return a; }
   2923 
   2924 ivec2_scalar get_nth(ivec2 a, int n) { return ivec2_scalar{a.x[n], a.y[n]}; }
   2925 
   2926 vec2_scalar get_nth(vec2 a, int n) { return vec2_scalar{a.x[n], a.y[n]}; }
   2927 
   2928 vec3_scalar get_nth(vec3 a, int n) {
   2929  return vec3_scalar{a.x[n], a.y[n], a.z[n]};
   2930 }
   2931 
   2932 vec4_scalar get_nth(vec4 a, int n) {
   2933  return vec4_scalar{a.x[n], a.y[n], a.z[n], a.w[n]};
   2934 }
   2935 
   2936 ivec4_scalar get_nth(ivec4 a, int n) {
   2937  return ivec4_scalar{a.x[n], a.y[n], a.z[n], a.w[n]};
   2938 }
   2939 
   2940 mat3_scalar get_nth(mat3 a, int n) {
   2941  return make_mat3(get_nth(a[0], n), get_nth(a[1], n), get_nth(a[2], n));
   2942 }
   2943 
   2944 void put_nth(Float& dst, int n, float src) { dst[n] = src; }
   2945 
   2946 void put_nth(I32& dst, int n, int32_t src) { dst[n] = src; }
   2947 
   2948 void put_nth(ivec2& dst, int n, ivec2_scalar src) {
   2949  dst.x[n] = src.x;
   2950  dst.y[n] = src.y;
   2951 }
   2952 
   2953 void put_nth(vec2& dst, int n, vec2_scalar src) {
   2954  dst.x[n] = src.x;
   2955  dst.y[n] = src.y;
   2956 }
   2957 
   2958 void put_nth(vec3& dst, int n, vec3_scalar src) {
   2959  dst.x[n] = src.x;
   2960  dst.y[n] = src.y;
   2961  dst.z[n] = src.z;
   2962 }
   2963 
   2964 void put_nth(ivec4& dst, int n, ivec4_scalar src) {
   2965  dst.x[n] = src.x;
   2966  dst.y[n] = src.y;
   2967  dst.z[n] = src.z;
   2968  dst.w[n] = src.w;
   2969 }
   2970 
   2971 void put_nth(vec4& dst, int n, vec4_scalar src) {
   2972  dst.x[n] = src.x;
   2973  dst.y[n] = src.y;
   2974  dst.z[n] = src.z;
   2975  dst.w[n] = src.w;
   2976 }
   2977 
   2978 // Use an ElementType type constructor
   2979 // so that we can implement element_type for
   2980 // Int and Float
   2981 template <typename V>
   2982 struct ElementType {
   2983  typedef typename V::element_type ty;
   2984 };
   2985 
   2986 template <>
   2987 struct ElementType<float> {
   2988  typedef float ty;
   2989 };
   2990 
   2991 template <>
   2992 struct ElementType<int> {
   2993  typedef float ty;
   2994 };
   2995 
   2996 template <>
   2997 struct ElementType<Float> {
   2998  typedef float ty;
   2999 };
   3000 
   3001 template <>
   3002 struct ElementType<I32> {
   3003  typedef int32_t ty;
   3004 };
   3005 
   3006 void put_nth_component(ivec2_scalar& dst, int n, int32_t src) {
   3007  switch (n) {
   3008    case 0:
   3009      dst.x = src;
   3010      break;
   3011    case 1:
   3012      dst.y = src;
   3013      break;
   3014  }
   3015 }
   3016 
   3017 void put_nth_component(ivec4_scalar& dst, int n, int32_t src) {
   3018  switch (n) {
   3019    case 0:
   3020      dst.x = src;
   3021      break;
   3022    case 1:
   3023      dst.y = src;
   3024      break;
   3025    case 2:
   3026      dst.z = src;
   3027      break;
   3028    case 3:
   3029      dst.w = src;
   3030      break;
   3031  }
   3032 }
   3033 
   3034 void put_nth_component(int& dst, int n, int src) {
   3035  switch (n) {
   3036    case 0:
   3037      dst = src;
   3038      break;
   3039  }
   3040 }
   3041 
   3042 void put_nth_component(float& dst, int n, float src) {
   3043  switch (n) {
   3044    case 0:
   3045      dst = src;
   3046      break;
   3047  }
   3048 }
   3049 
   3050 void put_nth_component(vec2_scalar& dst, int n, float src) {
   3051  switch (n) {
   3052    case 0:
   3053      dst.x = src;
   3054      break;
   3055    case 1:
   3056      dst.y = src;
   3057      break;
   3058  }
   3059 }
   3060 
   3061 void put_nth_component(vec3_scalar& dst, int n, float src) {
   3062  switch (n) {
   3063    case 0:
   3064      dst.x = src;
   3065      break;
   3066    case 1:
   3067      dst.y = src;
   3068      break;
   3069    case 2:
   3070      dst.z = src;
   3071      break;
   3072  }
   3073 }
   3074 
   3075 void put_nth_component(vec4_scalar& dst, int n, float src) {
   3076  switch (n) {
   3077    case 0:
   3078      dst.x = src;
   3079      break;
   3080    case 1:
   3081      dst.y = src;
   3082      break;
   3083    case 2:
   3084      dst.z = src;
   3085      break;
   3086    case 3:
   3087      dst.w = src;
   3088      break;
   3089  }
   3090 }
   3091 
   3092 Float init_interp(float init0, float step) {
   3093  float init1 = init0 + step;
   3094  float init2 = init1 + step;
   3095  float init3 = init2 + step;
   3096  return {init0, init1, init2, init3};
   3097 }
   3098 
   3099 vec2 init_interp(vec2_scalar init, vec2_scalar step) {
   3100  return vec2(init_interp(init.x, step.x), init_interp(init.y, step.y));
   3101 }
   3102 
   3103 vec3 init_interp(vec3_scalar init, vec3_scalar step) {
   3104  return vec3(init_interp(init.x, step.x), init_interp(init.y, step.y),
   3105              init_interp(init.z, step.z));
   3106 }
   3107 
   3108 vec4 init_interp(vec4_scalar init, vec4_scalar step) {
   3109  return vec4(init_interp(init.x, step.x), init_interp(init.y, step.y),
   3110              init_interp(init.z, step.z), init_interp(init.w, step.w));
   3111 }
   3112 
   3113 template <typename T, size_t N>
   3114 struct Array {
   3115  T elements[N];
   3116  T& operator[](size_t i) { return elements[i]; }
   3117  const T& operator[](size_t i) const { return elements[i]; }
   3118  template <typename S>
   3119  void convert(const Array<S, N>& s) {
   3120    for (size_t i = 0; i < N; ++i) elements[i] = T(s[i]);
   3121  }
   3122 };
   3123 
   3124 template <size_t SIZE>
   3125 Array<vec2, SIZE> if_then_else(I32 c, Array<vec2, SIZE> t,
   3126                               Array<vec2, SIZE> e) {
   3127  Array<vec2, SIZE> r;
   3128  for (size_t i = 0; i < SIZE; i++) {
   3129    r[i] = if_then_else(c, t[i], e[i]);
   3130  }
   3131  return r;
   3132 }
   3133 
   3134 }  // namespace glsl