tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

loadimage.cpp (66951B)


      1 //
      2 // Copyright 2013 The ANGLE Project Authors. All rights reserved.
      3 // Use of this source code is governed by a BSD-style license that can be
      4 // found in the LICENSE file.
      5 //
      6 
      7 // angle_loadimage.cpp: Defines image loading functions.
      8 
      9 #include "image_util/loadimage.h"
     10 
     11 #include "common/mathutil.h"
     12 #include "common/platform.h"
     13 #include "image_util/imageformats.h"
     14 
     15 namespace angle
     16 {
     17 
     18 void LoadA8ToRGBA8(size_t width,
     19                   size_t height,
     20                   size_t depth,
     21                   const uint8_t *input,
     22                   size_t inputRowPitch,
     23                   size_t inputDepthPitch,
     24                   uint8_t *output,
     25                   size_t outputRowPitch,
     26                   size_t outputDepthPitch)
     27 {
     28 #if defined(ANGLE_USE_SSE)
     29    if (gl::supportsSSE2())
     30    {
     31        __m128i zeroWide = _mm_setzero_si128();
     32 
     33        for (size_t z = 0; z < depth; z++)
     34        {
     35            for (size_t y = 0; y < height; y++)
     36            {
     37                const uint8_t *source =
     38                    priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
     39                uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
     40                                                                   outputDepthPitch);
     41 
     42                size_t x = 0;
     43 
     44                // Make output writes aligned
     45                for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
     46                {
     47                    dest[x] = static_cast<uint32_t>(source[x]) << 24;
     48                }
     49 
     50                for (; x + 7 < width; x += 8)
     51                {
     52                    __m128i sourceData =
     53                        _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&source[x]));
     54                    // Interleave each byte to 16bit, make the lower byte to zero
     55                    sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
     56                    // Interleave each 16bit to 32bit, make the lower 16bit to zero
     57                    __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
     58                    __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
     59 
     60                    _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), lo);
     61                    _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x + 4]), hi);
     62                }
     63 
     64                // Handle the remainder
     65                for (; x < width; x++)
     66                {
     67                    dest[x] = static_cast<uint32_t>(source[x]) << 24;
     68                }
     69            }
     70        }
     71 
     72        return;
     73    }
     74 #endif
     75 
     76    for (size_t z = 0; z < depth; z++)
     77    {
     78        for (size_t y = 0; y < height; y++)
     79        {
     80            const uint8_t *source =
     81                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
     82            uint32_t *dest =
     83                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
     84            for (size_t x = 0; x < width; x++)
     85            {
     86                dest[x] = static_cast<uint32_t>(source[x]) << 24;
     87            }
     88        }
     89    }
     90 }
     91 
     92 void LoadA8ToBGRA8(size_t width,
     93                   size_t height,
     94                   size_t depth,
     95                   const uint8_t *input,
     96                   size_t inputRowPitch,
     97                   size_t inputDepthPitch,
     98                   uint8_t *output,
     99                   size_t outputRowPitch,
    100                   size_t outputDepthPitch)
    101 {
    102    // Same as loading to RGBA
    103    LoadA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
    104                  outputRowPitch, outputDepthPitch);
    105 }
    106 
    107 void LoadA32FToRGBA32F(size_t width,
    108                       size_t height,
    109                       size_t depth,
    110                       const uint8_t *input,
    111                       size_t inputRowPitch,
    112                       size_t inputDepthPitch,
    113                       uint8_t *output,
    114                       size_t outputRowPitch,
    115                       size_t outputDepthPitch)
    116 {
    117    for (size_t z = 0; z < depth; z++)
    118    {
    119        for (size_t y = 0; y < height; y++)
    120        {
    121            const float *source =
    122                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
    123            float *dest =
    124                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
    125            for (size_t x = 0; x < width; x++)
    126            {
    127                dest[4 * x + 0] = 0.0f;
    128                dest[4 * x + 1] = 0.0f;
    129                dest[4 * x + 2] = 0.0f;
    130                dest[4 * x + 3] = source[x];
    131            }
    132        }
    133    }
    134 }
    135 
    136 void LoadA16FToRGBA16F(size_t width,
    137                       size_t height,
    138                       size_t depth,
    139                       const uint8_t *input,
    140                       size_t inputRowPitch,
    141                       size_t inputDepthPitch,
    142                       uint8_t *output,
    143                       size_t outputRowPitch,
    144                       size_t outputDepthPitch)
    145 {
    146    for (size_t z = 0; z < depth; z++)
    147    {
    148        for (size_t y = 0; y < height; y++)
    149        {
    150            const uint16_t *source =
    151                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    152            uint16_t *dest =
    153                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    154            for (size_t x = 0; x < width; x++)
    155            {
    156                dest[4 * x + 0] = 0;
    157                dest[4 * x + 1] = 0;
    158                dest[4 * x + 2] = 0;
    159                dest[4 * x + 3] = source[x];
    160            }
    161        }
    162    }
    163 }
    164 
    165 void LoadL8ToRGBA8(size_t width,
    166                   size_t height,
    167                   size_t depth,
    168                   const uint8_t *input,
    169                   size_t inputRowPitch,
    170                   size_t inputDepthPitch,
    171                   uint8_t *output,
    172                   size_t outputRowPitch,
    173                   size_t outputDepthPitch)
    174 {
    175    for (size_t z = 0; z < depth; z++)
    176    {
    177        for (size_t y = 0; y < height; y++)
    178        {
    179            const uint8_t *source =
    180                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    181            uint8_t *dest =
    182                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    183            for (size_t x = 0; x < width; x++)
    184            {
    185                uint8_t sourceVal = source[x];
    186                dest[4 * x + 0]   = sourceVal;
    187                dest[4 * x + 1]   = sourceVal;
    188                dest[4 * x + 2]   = sourceVal;
    189                dest[4 * x + 3]   = 0xFF;
    190            }
    191        }
    192    }
    193 }
    194 
    195 void LoadL8ToBGRA8(size_t width,
    196                   size_t height,
    197                   size_t depth,
    198                   const uint8_t *input,
    199                   size_t inputRowPitch,
    200                   size_t inputDepthPitch,
    201                   uint8_t *output,
    202                   size_t outputRowPitch,
    203                   size_t outputDepthPitch)
    204 {
    205    // Same as loading to RGBA
    206    LoadL8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
    207                  outputRowPitch, outputDepthPitch);
    208 }
    209 
    210 void LoadL32FToRGBA32F(size_t width,
    211                       size_t height,
    212                       size_t depth,
    213                       const uint8_t *input,
    214                       size_t inputRowPitch,
    215                       size_t inputDepthPitch,
    216                       uint8_t *output,
    217                       size_t outputRowPitch,
    218                       size_t outputDepthPitch)
    219 {
    220    for (size_t z = 0; z < depth; z++)
    221    {
    222        for (size_t y = 0; y < height; y++)
    223        {
    224            const float *source =
    225                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
    226            float *dest =
    227                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
    228            for (size_t x = 0; x < width; x++)
    229            {
    230                dest[4 * x + 0] = source[x];
    231                dest[4 * x + 1] = source[x];
    232                dest[4 * x + 2] = source[x];
    233                dest[4 * x + 3] = 1.0f;
    234            }
    235        }
    236    }
    237 }
    238 
    239 void LoadL16FToRGBA16F(size_t width,
    240                       size_t height,
    241                       size_t depth,
    242                       const uint8_t *input,
    243                       size_t inputRowPitch,
    244                       size_t inputDepthPitch,
    245                       uint8_t *output,
    246                       size_t outputRowPitch,
    247                       size_t outputDepthPitch)
    248 {
    249    for (size_t z = 0; z < depth; z++)
    250    {
    251        for (size_t y = 0; y < height; y++)
    252        {
    253            const uint16_t *source =
    254                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    255            uint16_t *dest =
    256                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    257            for (size_t x = 0; x < width; x++)
    258            {
    259                dest[4 * x + 0] = source[x];
    260                dest[4 * x + 1] = source[x];
    261                dest[4 * x + 2] = source[x];
    262                dest[4 * x + 3] = gl::Float16One;
    263            }
    264        }
    265    }
    266 }
    267 
    268 void LoadLA8ToRGBA8(size_t width,
    269                    size_t height,
    270                    size_t depth,
    271                    const uint8_t *input,
    272                    size_t inputRowPitch,
    273                    size_t inputDepthPitch,
    274                    uint8_t *output,
    275                    size_t outputRowPitch,
    276                    size_t outputDepthPitch)
    277 {
    278    for (size_t z = 0; z < depth; z++)
    279    {
    280        for (size_t y = 0; y < height; y++)
    281        {
    282            const uint8_t *source =
    283                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    284            uint8_t *dest =
    285                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    286            for (size_t x = 0; x < width; x++)
    287            {
    288                dest[4 * x + 0] = source[2 * x + 0];
    289                dest[4 * x + 1] = source[2 * x + 0];
    290                dest[4 * x + 2] = source[2 * x + 0];
    291                dest[4 * x + 3] = source[2 * x + 1];
    292            }
    293        }
    294    }
    295 }
    296 
    297 void LoadLA8ToBGRA8(size_t width,
    298                    size_t height,
    299                    size_t depth,
    300                    const uint8_t *input,
    301                    size_t inputRowPitch,
    302                    size_t inputDepthPitch,
    303                    uint8_t *output,
    304                    size_t outputRowPitch,
    305                    size_t outputDepthPitch)
    306 {
    307    // Same as loading to RGBA
    308    LoadLA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
    309                   outputRowPitch, outputDepthPitch);
    310 }
    311 
    312 void LoadLA32FToRGBA32F(size_t width,
    313                        size_t height,
    314                        size_t depth,
    315                        const uint8_t *input,
    316                        size_t inputRowPitch,
    317                        size_t inputDepthPitch,
    318                        uint8_t *output,
    319                        size_t outputRowPitch,
    320                        size_t outputDepthPitch)
    321 {
    322    for (size_t z = 0; z < depth; z++)
    323    {
    324        for (size_t y = 0; y < height; y++)
    325        {
    326            const float *source =
    327                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
    328            float *dest =
    329                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
    330            for (size_t x = 0; x < width; x++)
    331            {
    332                dest[4 * x + 0] = source[2 * x + 0];
    333                dest[4 * x + 1] = source[2 * x + 0];
    334                dest[4 * x + 2] = source[2 * x + 0];
    335                dest[4 * x + 3] = source[2 * x + 1];
    336            }
    337        }
    338    }
    339 }
    340 
    341 void LoadLA16FToRGBA16F(size_t width,
    342                        size_t height,
    343                        size_t depth,
    344                        const uint8_t *input,
    345                        size_t inputRowPitch,
    346                        size_t inputDepthPitch,
    347                        uint8_t *output,
    348                        size_t outputRowPitch,
    349                        size_t outputDepthPitch)
    350 {
    351    for (size_t z = 0; z < depth; z++)
    352    {
    353        for (size_t y = 0; y < height; y++)
    354        {
    355            const uint16_t *source =
    356                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    357            uint16_t *dest =
    358                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    359            for (size_t x = 0; x < width; x++)
    360            {
    361                dest[4 * x + 0] = source[2 * x + 0];
    362                dest[4 * x + 1] = source[2 * x + 0];
    363                dest[4 * x + 2] = source[2 * x + 0];
    364                dest[4 * x + 3] = source[2 * x + 1];
    365            }
    366        }
    367    }
    368 }
    369 
    370 void LoadRGB8ToBGR565(size_t width,
    371                      size_t height,
    372                      size_t depth,
    373                      const uint8_t *input,
    374                      size_t inputRowPitch,
    375                      size_t inputDepthPitch,
    376                      uint8_t *output,
    377                      size_t outputRowPitch,
    378                      size_t outputDepthPitch)
    379 {
    380    for (size_t z = 0; z < depth; z++)
    381    {
    382        for (size_t y = 0; y < height; y++)
    383        {
    384            const uint8_t *source =
    385                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    386            uint16_t *dest =
    387                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    388            for (size_t x = 0; x < width; x++)
    389            {
    390                uint8_t r8 = source[x * 3 + 0];
    391                uint8_t g8 = source[x * 3 + 1];
    392                uint8_t b8 = source[x * 3 + 2];
    393                auto r5    = static_cast<uint16_t>(r8 >> 3);
    394                auto g6    = static_cast<uint16_t>(g8 >> 2);
    395                auto b5    = static_cast<uint16_t>(b8 >> 3);
    396                dest[x]    = (r5 << 11) | (g6 << 5) | b5;
    397            }
    398        }
    399    }
    400 }
    401 
    402 void LoadRGB565ToBGR565(size_t width,
    403                        size_t height,
    404                        size_t depth,
    405                        const uint8_t *input,
    406                        size_t inputRowPitch,
    407                        size_t inputDepthPitch,
    408                        uint8_t *output,
    409                        size_t outputRowPitch,
    410                        size_t outputDepthPitch)
    411 {
    412    for (size_t z = 0; z < depth; z++)
    413    {
    414        for (size_t y = 0; y < height; y++)
    415        {
    416            const uint16_t *source =
    417                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    418            uint16_t *dest =
    419                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    420            for (size_t x = 0; x < width; x++)
    421            {
    422                // The GL type RGB is packed with with red in the MSB, while the D3D11 type BGR
    423                // is packed with red in the LSB
    424                auto rgb    = source[x];
    425                uint16_t r5 = gl::getShiftedData<5, 11>(rgb);
    426                uint16_t g6 = gl::getShiftedData<6, 5>(rgb);
    427                uint16_t b5 = gl::getShiftedData<5, 0>(rgb);
    428                dest[x]     = (r5 << 11) | (g6 << 5) | b5;
    429            }
    430        }
    431    }
    432 }
    433 
    434 void LoadRGB8ToBGRX8(size_t width,
    435                     size_t height,
    436                     size_t depth,
    437                     const uint8_t *input,
    438                     size_t inputRowPitch,
    439                     size_t inputDepthPitch,
    440                     uint8_t *output,
    441                     size_t outputRowPitch,
    442                     size_t outputDepthPitch)
    443 {
    444    for (size_t z = 0; z < depth; z++)
    445    {
    446        for (size_t y = 0; y < height; y++)
    447        {
    448            const uint8_t *source =
    449                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    450            uint8_t *dest =
    451                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    452            for (size_t x = 0; x < width; x++)
    453            {
    454                dest[4 * x + 0] = source[x * 3 + 2];
    455                dest[4 * x + 1] = source[x * 3 + 1];
    456                dest[4 * x + 2] = source[x * 3 + 0];
    457                dest[4 * x + 3] = 0xFF;
    458            }
    459        }
    460    }
    461 }
    462 
    463 void LoadRG8ToBGRX8(size_t width,
    464                    size_t height,
    465                    size_t depth,
    466                    const uint8_t *input,
    467                    size_t inputRowPitch,
    468                    size_t inputDepthPitch,
    469                    uint8_t *output,
    470                    size_t outputRowPitch,
    471                    size_t outputDepthPitch)
    472 {
    473    for (size_t z = 0; z < depth; z++)
    474    {
    475        for (size_t y = 0; y < height; y++)
    476        {
    477            const uint8_t *source =
    478                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    479            uint8_t *dest =
    480                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    481            for (size_t x = 0; x < width; x++)
    482            {
    483                dest[4 * x + 0] = 0x00;
    484                dest[4 * x + 1] = source[x * 2 + 1];
    485                dest[4 * x + 2] = source[x * 2 + 0];
    486                dest[4 * x + 3] = 0xFF;
    487            }
    488        }
    489    }
    490 }
    491 
    492 void LoadR8ToBGRX8(size_t width,
    493                   size_t height,
    494                   size_t depth,
    495                   const uint8_t *input,
    496                   size_t inputRowPitch,
    497                   size_t inputDepthPitch,
    498                   uint8_t *output,
    499                   size_t outputRowPitch,
    500                   size_t outputDepthPitch)
    501 {
    502    for (size_t z = 0; z < depth; z++)
    503    {
    504        for (size_t y = 0; y < height; y++)
    505        {
    506            const uint8_t *source =
    507                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
    508            uint8_t *dest =
    509                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    510            for (size_t x = 0; x < width; x++)
    511            {
    512                dest[4 * x + 0] = 0x00;
    513                dest[4 * x + 1] = 0x00;
    514                dest[4 * x + 2] = source[x];
    515                dest[4 * x + 3] = 0xFF;
    516            }
    517        }
    518    }
    519 }
    520 
    521 void LoadR5G6B5ToBGRA8(size_t width,
    522                       size_t height,
    523                       size_t depth,
    524                       const uint8_t *input,
    525                       size_t inputRowPitch,
    526                       size_t inputDepthPitch,
    527                       uint8_t *output,
    528                       size_t outputRowPitch,
    529                       size_t outputDepthPitch)
    530 {
    531    for (size_t z = 0; z < depth; z++)
    532    {
    533        for (size_t y = 0; y < height; y++)
    534        {
    535            const uint16_t *source =
    536                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    537            uint8_t *dest =
    538                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    539            for (size_t x = 0; x < width; x++)
    540            {
    541                uint16_t rgb = source[x];
    542                dest[4 * x + 0] =
    543                    static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
    544                dest[4 * x + 1] =
    545                    static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
    546                dest[4 * x + 2] =
    547                    static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
    548                dest[4 * x + 3] = 0xFF;
    549            }
    550        }
    551    }
    552 }
    553 
    554 void LoadR5G6B5ToRGBA8(size_t width,
    555                       size_t height,
    556                       size_t depth,
    557                       const uint8_t *input,
    558                       size_t inputRowPitch,
    559                       size_t inputDepthPitch,
    560                       uint8_t *output,
    561                       size_t outputRowPitch,
    562                       size_t outputDepthPitch)
    563 {
    564    for (size_t z = 0; z < depth; z++)
    565    {
    566        for (size_t y = 0; y < height; y++)
    567        {
    568            const uint16_t *source =
    569                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    570            uint8_t *dest =
    571                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    572            for (size_t x = 0; x < width; x++)
    573            {
    574                uint16_t rgb = source[x];
    575                dest[4 * x + 0] =
    576                    static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
    577                dest[4 * x + 1] =
    578                    static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
    579                dest[4 * x + 2] =
    580                    static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
    581                dest[4 * x + 3] = 0xFF;
    582            }
    583        }
    584    }
    585 }
    586 
    587 void LoadRGBA8ToBGRA8(size_t width,
    588                      size_t height,
    589                      size_t depth,
    590                      const uint8_t *input,
    591                      size_t inputRowPitch,
    592                      size_t inputDepthPitch,
    593                      uint8_t *output,
    594                      size_t outputRowPitch,
    595                      size_t outputDepthPitch)
    596 {
    597 #if defined(ANGLE_USE_SSE)
    598    if (gl::supportsSSE2())
    599    {
    600        __m128i brMask = _mm_set1_epi32(0x00ff00ff);
    601 
    602        for (size_t z = 0; z < depth; z++)
    603        {
    604            for (size_t y = 0; y < height; y++)
    605            {
    606                const uint32_t *source =
    607                    priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    608                uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
    609                                                                   outputDepthPitch);
    610 
    611                size_t x = 0;
    612 
    613                // Make output writes aligned
    614                for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
    615                {
    616                    uint32_t rgba = source[x];
    617                    dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
    618                }
    619 
    620                for (; x + 3 < width; x += 4)
    621                {
    622                    __m128i sourceData =
    623                        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&source[x]));
    624                    // Mask out g and a, which don't change
    625                    __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
    626                    // Mask out b and r
    627                    __m128i brComponents = _mm_and_si128(sourceData, brMask);
    628                    // Swap b and r
    629                    __m128i brSwapped = _mm_shufflehi_epi16(
    630                        _mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)),
    631                        _MM_SHUFFLE(2, 3, 0, 1));
    632                    __m128i result = _mm_or_si128(gaComponents, brSwapped);
    633                    _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), result);
    634                }
    635 
    636                // Perform leftover writes
    637                for (; x < width; x++)
    638                {
    639                    uint32_t rgba = source[x];
    640                    dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
    641                }
    642            }
    643        }
    644 
    645        return;
    646    }
    647 #endif
    648 
    649    for (size_t z = 0; z < depth; z++)
    650    {
    651        for (size_t y = 0; y < height; y++)
    652        {
    653            const uint32_t *source =
    654                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    655            uint32_t *dest =
    656                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
    657            for (size_t x = 0; x < width; x++)
    658            {
    659                uint32_t rgba = source[x];
    660                dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
    661            }
    662        }
    663    }
    664 }
    665 
    666 void LoadRGBA8ToBGRA4(size_t width,
    667                      size_t height,
    668                      size_t depth,
    669                      const uint8_t *input,
    670                      size_t inputRowPitch,
    671                      size_t inputDepthPitch,
    672                      uint8_t *output,
    673                      size_t outputRowPitch,
    674                      size_t outputDepthPitch)
    675 {
    676    for (size_t z = 0; z < depth; z++)
    677    {
    678        for (size_t y = 0; y < height; y++)
    679        {
    680            const uint32_t *source =
    681                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    682            uint16_t *dest =
    683                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    684            for (size_t x = 0; x < width; x++)
    685            {
    686                uint32_t rgba8 = source[x];
    687                auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
    688                auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
    689                auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
    690                auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
    691                dest[x]        = (a4 << 12) | (r4 << 8) | (g4 << 4) | b4;
    692            }
    693        }
    694    }
    695 }
    696 
    697 void LoadRGBA8ToRGBA4(size_t width,
    698                      size_t height,
    699                      size_t depth,
    700                      const uint8_t *input,
    701                      size_t inputRowPitch,
    702                      size_t inputDepthPitch,
    703                      uint8_t *output,
    704                      size_t outputRowPitch,
    705                      size_t outputDepthPitch)
    706 {
    707    for (size_t z = 0; z < depth; z++)
    708    {
    709        for (size_t y = 0; y < height; y++)
    710        {
    711            const uint32_t *source =
    712                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    713            uint16_t *dest =
    714                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    715            for (size_t x = 0; x < width; x++)
    716            {
    717                uint32_t rgba8 = source[x];
    718                auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
    719                auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
    720                auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
    721                auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
    722                dest[x]        = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4;
    723            }
    724        }
    725    }
    726 }
    727 
    728 void LoadRGBA4ToARGB4(size_t width,
    729                      size_t height,
    730                      size_t depth,
    731                      const uint8_t *input,
    732                      size_t inputRowPitch,
    733                      size_t inputDepthPitch,
    734                      uint8_t *output,
    735                      size_t outputRowPitch,
    736                      size_t outputDepthPitch)
    737 {
    738    for (size_t z = 0; z < depth; z++)
    739    {
    740        for (size_t y = 0; y < height; y++)
    741        {
    742            const uint16_t *source =
    743                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    744            uint16_t *dest =
    745                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    746            for (size_t x = 0; x < width; x++)
    747            {
    748                dest[x] = ANGLE_ROTR16(source[x], 4);
    749            }
    750        }
    751    }
    752 }
    753 
    754 void LoadRGBA4ToBGRA8(size_t width,
    755                      size_t height,
    756                      size_t depth,
    757                      const uint8_t *input,
    758                      size_t inputRowPitch,
    759                      size_t inputDepthPitch,
    760                      uint8_t *output,
    761                      size_t outputRowPitch,
    762                      size_t outputDepthPitch)
    763 {
    764    for (size_t z = 0; z < depth; z++)
    765    {
    766        for (size_t y = 0; y < height; y++)
    767        {
    768            const uint16_t *source =
    769                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    770            uint8_t *dest =
    771                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    772            for (size_t x = 0; x < width; x++)
    773            {
    774                uint16_t rgba = source[x];
    775                dest[4 * x + 0] =
    776                    static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
    777                dest[4 * x + 1] =
    778                    static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
    779                dest[4 * x + 2] =
    780                    static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
    781                dest[4 * x + 3] =
    782                    static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
    783            }
    784        }
    785    }
    786 }
    787 
    788 void LoadRGBA4ToRGBA8(size_t width,
    789                      size_t height,
    790                      size_t depth,
    791                      const uint8_t *input,
    792                      size_t inputRowPitch,
    793                      size_t inputDepthPitch,
    794                      uint8_t *output,
    795                      size_t outputRowPitch,
    796                      size_t outputDepthPitch)
    797 {
    798    for (size_t z = 0; z < depth; z++)
    799    {
    800        for (size_t y = 0; y < height; y++)
    801        {
    802            const uint16_t *source =
    803                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    804            uint8_t *dest =
    805                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    806            for (size_t x = 0; x < width; x++)
    807            {
    808                uint16_t rgba = source[x];
    809                dest[4 * x + 0] =
    810                    static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
    811                dest[4 * x + 1] =
    812                    static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
    813                dest[4 * x + 2] =
    814                    static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
    815                dest[4 * x + 3] =
    816                    static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
    817            }
    818        }
    819    }
    820 }
    821 
    822 void LoadBGRA4ToBGRA8(size_t width,
    823                      size_t height,
    824                      size_t depth,
    825                      const uint8_t *input,
    826                      size_t inputRowPitch,
    827                      size_t inputDepthPitch,
    828                      uint8_t *output,
    829                      size_t outputRowPitch,
    830                      size_t outputDepthPitch)
    831 {
    832    for (size_t z = 0; z < depth; z++)
    833    {
    834        for (size_t y = 0; y < height; y++)
    835        {
    836            const uint16_t *source =
    837                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
    838            uint8_t *dest =
    839                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
    840            for (size_t x = 0; x < width; x++)
    841            {
    842                uint16_t bgra = source[x];
    843                dest[4 * x + 0] =
    844                    static_cast<uint8_t>(((bgra & 0xF000) >> 8) | ((bgra & 0xF000) >> 12));
    845                dest[4 * x + 1] =
    846                    static_cast<uint8_t>(((bgra & 0x0F00) >> 4) | ((bgra & 0x0F00) >> 8));
    847                dest[4 * x + 2] =
    848                    static_cast<uint8_t>(((bgra & 0x00F0) << 0) | ((bgra & 0x00F0) >> 4));
    849                dest[4 * x + 3] =
    850                    static_cast<uint8_t>(((bgra & 0x000F) << 4) | ((bgra & 0x000F) >> 0));
    851            }
    852        }
    853    }
    854 }
    855 
    856 void LoadRGBA8ToBGR5A1(size_t width,
    857                       size_t height,
    858                       size_t depth,
    859                       const uint8_t *input,
    860                       size_t inputRowPitch,
    861                       size_t inputDepthPitch,
    862                       uint8_t *output,
    863                       size_t outputRowPitch,
    864                       size_t outputDepthPitch)
    865 {
    866    for (size_t z = 0; z < depth; z++)
    867    {
    868        for (size_t y = 0; y < height; y++)
    869        {
    870            const uint32_t *source =
    871                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    872            uint16_t *dest =
    873                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    874            for (size_t x = 0; x < width; x++)
    875            {
    876                uint32_t rgba8 = source[x];
    877                auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
    878                auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
    879                auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
    880                auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
    881                dest[x]        = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
    882            }
    883        }
    884    }
    885 }
    886 
    887 void LoadRGBA8ToRGB5A1(size_t width,
    888                       size_t height,
    889                       size_t depth,
    890                       const uint8_t *input,
    891                       size_t inputRowPitch,
    892                       size_t inputDepthPitch,
    893                       uint8_t *output,
    894                       size_t outputRowPitch,
    895                       size_t outputDepthPitch)
    896 {
    897    for (size_t z = 0; z < depth; z++)
    898    {
    899        for (size_t y = 0; y < height; y++)
    900        {
    901            const uint32_t *source =
    902                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
    903            uint16_t *dest =
    904                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    905            for (size_t x = 0; x < width; x++)
    906            {
    907                uint32_t rgba8 = source[x];
    908                auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
    909                auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
    910                auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
    911                auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
    912                dest[x]        = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
    913            }
    914        }
    915    }
    916 }
    917 
    918 void LoadRGB10A2ToBGR5A1(size_t width,
    919                         size_t height,
    920                         size_t depth,
    921                         const uint8_t *input,
    922                         size_t inputRowPitch,
    923                         size_t inputDepthPitch,
    924                         uint8_t *output,
    925                         size_t outputRowPitch,
    926                         size_t outputDepthPitch)
    927 {
    928    for (size_t z = 0; z < depth; z++)
    929    {
    930        for (size_t y = 0; y < height; y++)
    931        {
    932            const R10G10B10A2 *source =
    933                priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
    934            uint16_t *dest =
    935                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    936            for (size_t x = 0; x < width; x++)
    937            {
    938                R10G10B10A2 rgb10a2 = source[x];
    939 
    940                uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
    941                uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
    942                uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
    943                uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
    944 
    945                dest[x] = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
    946            }
    947        }
    948    }
    949 }
    950 
    951 void LoadRGB10A2ToRGB5A1(size_t width,
    952                         size_t height,
    953                         size_t depth,
    954                         const uint8_t *input,
    955                         size_t inputRowPitch,
    956                         size_t inputDepthPitch,
    957                         uint8_t *output,
    958                         size_t outputRowPitch,
    959                         size_t outputDepthPitch)
    960 {
    961    for (size_t z = 0; z < depth; z++)
    962    {
    963        for (size_t y = 0; y < height; y++)
    964        {
    965            const R10G10B10A2 *source =
    966                priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
    967            uint16_t *dest =
    968                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
    969            for (size_t x = 0; x < width; x++)
    970            {
    971                R10G10B10A2 rgb10a2 = source[x];
    972 
    973                uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
    974                uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
    975                uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
    976                uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
    977 
    978                dest[x] = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
    979            }
    980        }
    981    }
    982 }
    983 
    984 void LoadRGB5A1ToA1RGB5(size_t width,
    985                        size_t height,
    986                        size_t depth,
    987                        const uint8_t *input,
    988                        size_t inputRowPitch,
    989                        size_t inputDepthPitch,
    990                        uint8_t *output,
    991                        size_t outputRowPitch,
    992                        size_t outputDepthPitch)
    993 {
    994    for (size_t z = 0; z < depth; z++)
    995    {
    996        for (size_t y = 0; y < height; y++)
    997        {
    998            const uint16_t *source =
    999                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1000            uint16_t *dest =
   1001                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1002            for (size_t x = 0; x < width; x++)
   1003            {
   1004                dest[x] = ANGLE_ROTR16(source[x], 1);
   1005            }
   1006        }
   1007    }
   1008 }
   1009 
   1010 void LoadRGB5A1ToBGR5A1(size_t width,
   1011                        size_t height,
   1012                        size_t depth,
   1013                        const uint8_t *input,
   1014                        size_t inputRowPitch,
   1015                        size_t inputDepthPitch,
   1016                        uint8_t *output,
   1017                        size_t outputRowPitch,
   1018                        size_t outputDepthPitch)
   1019 {
   1020    for (size_t z = 0; z < depth; z++)
   1021    {
   1022        for (size_t y = 0; y < height; y++)
   1023        {
   1024            const uint16_t *source =
   1025                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1026            uint16_t *dest =
   1027                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1028            for (size_t x = 0; x < width; x++)
   1029            {
   1030                uint16_t rgba = source[x];
   1031                auto r5       = static_cast<uint16_t>((rgba & 0xF800) >> 11);
   1032                auto g5       = static_cast<uint16_t>((rgba & 0x07c0) >> 6);
   1033                auto b5       = static_cast<uint16_t>((rgba & 0x003e) >> 1);
   1034                auto a1       = static_cast<uint16_t>((rgba & 0x0001));
   1035                dest[x]       = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1;
   1036            }
   1037        }
   1038    }
   1039 }
   1040 
   1041 void LoadRGB5A1ToBGRA8(size_t width,
   1042                       size_t height,
   1043                       size_t depth,
   1044                       const uint8_t *input,
   1045                       size_t inputRowPitch,
   1046                       size_t inputDepthPitch,
   1047                       uint8_t *output,
   1048                       size_t outputRowPitch,
   1049                       size_t outputDepthPitch)
   1050 {
   1051    for (size_t z = 0; z < depth; z++)
   1052    {
   1053        for (size_t y = 0; y < height; y++)
   1054        {
   1055            const uint16_t *source =
   1056                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1057            uint8_t *dest =
   1058                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1059            for (size_t x = 0; x < width; x++)
   1060            {
   1061                uint16_t rgba = source[x];
   1062                dest[4 * x + 0] =
   1063                    static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
   1064                dest[4 * x + 1] =
   1065                    static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
   1066                dest[4 * x + 2] =
   1067                    static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
   1068                dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
   1069            }
   1070        }
   1071    }
   1072 }
   1073 
   1074 void LoadRGB5A1ToRGBA8(size_t width,
   1075                       size_t height,
   1076                       size_t depth,
   1077                       const uint8_t *input,
   1078                       size_t inputRowPitch,
   1079                       size_t inputDepthPitch,
   1080                       uint8_t *output,
   1081                       size_t outputRowPitch,
   1082                       size_t outputDepthPitch)
   1083 {
   1084    for (size_t z = 0; z < depth; z++)
   1085    {
   1086        for (size_t y = 0; y < height; y++)
   1087        {
   1088            const uint16_t *source =
   1089                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1090            uint8_t *dest =
   1091                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1092            for (size_t x = 0; x < width; x++)
   1093            {
   1094                uint16_t rgba = source[x];
   1095                dest[4 * x + 0] =
   1096                    static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
   1097                dest[4 * x + 1] =
   1098                    static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
   1099                dest[4 * x + 2] =
   1100                    static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
   1101                dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
   1102            }
   1103        }
   1104    }
   1105 }
   1106 
   1107 void LoadBGR5A1ToBGRA8(size_t width,
   1108                       size_t height,
   1109                       size_t depth,
   1110                       const uint8_t *input,
   1111                       size_t inputRowPitch,
   1112                       size_t inputDepthPitch,
   1113                       uint8_t *output,
   1114                       size_t outputRowPitch,
   1115                       size_t outputDepthPitch)
   1116 {
   1117    for (size_t z = 0; z < depth; z++)
   1118    {
   1119        for (size_t y = 0; y < height; y++)
   1120        {
   1121            const uint16_t *source =
   1122                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1123            uint8_t *dest =
   1124                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1125            for (size_t x = 0; x < width; x++)
   1126            {
   1127                uint16_t bgra = source[x];
   1128                dest[4 * x + 0] =
   1129                    static_cast<uint8_t>(((bgra & 0xF800) >> 8) | ((bgra & 0xF800) >> 13));
   1130                dest[4 * x + 1] =
   1131                    static_cast<uint8_t>(((bgra & 0x07C0) >> 3) | ((bgra & 0x07C0) >> 8));
   1132                dest[4 * x + 2] =
   1133                    static_cast<uint8_t>(((bgra & 0x003E) << 2) | ((bgra & 0x003E) >> 3));
   1134                dest[4 * x + 3] = static_cast<uint8_t>((bgra & 0x0001) ? 0xFF : 0);
   1135            }
   1136        }
   1137    }
   1138 }
   1139 
   1140 void LoadRGB10A2ToRGBA8(size_t width,
   1141                        size_t height,
   1142                        size_t depth,
   1143                        const uint8_t *input,
   1144                        size_t inputRowPitch,
   1145                        size_t inputDepthPitch,
   1146                        uint8_t *output,
   1147                        size_t outputRowPitch,
   1148                        size_t outputDepthPitch)
   1149 {
   1150    for (size_t z = 0; z < depth; z++)
   1151    {
   1152        for (size_t y = 0; y < height; y++)
   1153        {
   1154            const uint32_t *source =
   1155                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1156            uint8_t *dest =
   1157                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1158            for (size_t x = 0; x < width; x++)
   1159            {
   1160                uint32_t rgba   = source[x];
   1161                dest[4 * x + 0] = static_cast<uint8_t>((rgba & 0x000003FF) >> 2);
   1162                dest[4 * x + 1] = static_cast<uint8_t>((rgba & 0x000FFC00) >> 12);
   1163                dest[4 * x + 2] = static_cast<uint8_t>((rgba & 0x3FF00000) >> 22);
   1164                dest[4 * x + 3] = static_cast<uint8_t>(((rgba & 0xC0000000) >> 30) * 0x55);
   1165            }
   1166        }
   1167    }
   1168 }
   1169 
   1170 void LoadRGB10A2ToRGB10X2(size_t width,
   1171                          size_t height,
   1172                          size_t depth,
   1173                          const uint8_t *input,
   1174                          size_t inputRowPitch,
   1175                          size_t inputDepthPitch,
   1176                          uint8_t *output,
   1177                          size_t outputRowPitch,
   1178                          size_t outputDepthPitch)
   1179 {
   1180    for (size_t z = 0; z < depth; z++)
   1181    {
   1182        for (size_t y = 0; y < height; y++)
   1183        {
   1184            const uint32_t *source =
   1185                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1186            uint32_t *dest =
   1187                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1188            for (size_t x = 0; x < width; x++)
   1189            {
   1190                dest[x] = source[x] | 0xC0000000;
   1191            }
   1192        }
   1193    }
   1194 }
   1195 
   1196 void LoadRGB16FToRGB9E5(size_t width,
   1197                        size_t height,
   1198                        size_t depth,
   1199                        const uint8_t *input,
   1200                        size_t inputRowPitch,
   1201                        size_t inputDepthPitch,
   1202                        uint8_t *output,
   1203                        size_t outputRowPitch,
   1204                        size_t outputDepthPitch)
   1205 {
   1206    for (size_t z = 0; z < depth; z++)
   1207    {
   1208        for (size_t y = 0; y < height; y++)
   1209        {
   1210            const uint16_t *source =
   1211                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1212            uint32_t *dest =
   1213                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1214            for (size_t x = 0; x < width; x++)
   1215            {
   1216                dest[x] = gl::convertRGBFloatsTo999E5(gl::float16ToFloat32(source[x * 3 + 0]),
   1217                                                      gl::float16ToFloat32(source[x * 3 + 1]),
   1218                                                      gl::float16ToFloat32(source[x * 3 + 2]));
   1219            }
   1220        }
   1221    }
   1222 }
   1223 
   1224 void LoadRGB32FToRGB9E5(size_t width,
   1225                        size_t height,
   1226                        size_t depth,
   1227                        const uint8_t *input,
   1228                        size_t inputRowPitch,
   1229                        size_t inputDepthPitch,
   1230                        uint8_t *output,
   1231                        size_t outputRowPitch,
   1232                        size_t outputDepthPitch)
   1233 {
   1234    for (size_t z = 0; z < depth; z++)
   1235    {
   1236        for (size_t y = 0; y < height; y++)
   1237        {
   1238            const float *source =
   1239                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1240            uint32_t *dest =
   1241                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1242            for (size_t x = 0; x < width; x++)
   1243            {
   1244                dest[x] = gl::convertRGBFloatsTo999E5(source[x * 3 + 0], source[x * 3 + 1],
   1245                                                      source[x * 3 + 2]);
   1246            }
   1247        }
   1248    }
   1249 }
   1250 
   1251 void LoadRGB16FToRG11B10F(size_t width,
   1252                          size_t height,
   1253                          size_t depth,
   1254                          const uint8_t *input,
   1255                          size_t inputRowPitch,
   1256                          size_t inputDepthPitch,
   1257                          uint8_t *output,
   1258                          size_t outputRowPitch,
   1259                          size_t outputDepthPitch)
   1260 {
   1261    for (size_t z = 0; z < depth; z++)
   1262    {
   1263        for (size_t y = 0; y < height; y++)
   1264        {
   1265            const uint16_t *source =
   1266                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1267            uint32_t *dest =
   1268                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1269            for (size_t x = 0; x < width; x++)
   1270            {
   1271                dest[x] = (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 0])) << 0) |
   1272                          (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 1])) << 11) |
   1273                          (gl::float32ToFloat10(gl::float16ToFloat32(source[x * 3 + 2])) << 22);
   1274            }
   1275        }
   1276    }
   1277 }
   1278 
   1279 void LoadRGB32FToRG11B10F(size_t width,
   1280                          size_t height,
   1281                          size_t depth,
   1282                          const uint8_t *input,
   1283                          size_t inputRowPitch,
   1284                          size_t inputDepthPitch,
   1285                          uint8_t *output,
   1286                          size_t outputRowPitch,
   1287                          size_t outputDepthPitch)
   1288 {
   1289    for (size_t z = 0; z < depth; z++)
   1290    {
   1291        for (size_t y = 0; y < height; y++)
   1292        {
   1293            const float *source =
   1294                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1295            uint32_t *dest =
   1296                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1297            for (size_t x = 0; x < width; x++)
   1298            {
   1299                dest[x] = (gl::float32ToFloat11(source[x * 3 + 0]) << 0) |
   1300                          (gl::float32ToFloat11(source[x * 3 + 1]) << 11) |
   1301                          (gl::float32ToFloat10(source[x * 3 + 2]) << 22);
   1302            }
   1303        }
   1304    }
   1305 }
   1306 
   1307 void LoadG8R24ToR24G8(size_t width,
   1308                      size_t height,
   1309                      size_t depth,
   1310                      const uint8_t *input,
   1311                      size_t inputRowPitch,
   1312                      size_t inputDepthPitch,
   1313                      uint8_t *output,
   1314                      size_t outputRowPitch,
   1315                      size_t outputDepthPitch)
   1316 {
   1317    for (size_t z = 0; z < depth; z++)
   1318    {
   1319        for (size_t y = 0; y < height; y++)
   1320        {
   1321            const uint32_t *source =
   1322                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1323            uint32_t *dest =
   1324                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1325            for (size_t x = 0; x < width; x++)
   1326            {
   1327                uint32_t d = source[x] >> 8;
   1328                uint8_t s  = source[x] & 0xFF;
   1329                dest[x]    = d | (s << 24);
   1330            }
   1331        }
   1332    }
   1333 }
   1334 
   1335 void LoadD24S8ToD32FS8X24(size_t width,
   1336                          size_t height,
   1337                          size_t depth,
   1338                          const uint8_t *input,
   1339                          size_t inputRowPitch,
   1340                          size_t inputDepthPitch,
   1341                          uint8_t *output,
   1342                          size_t outputRowPitch,
   1343                          size_t outputDepthPitch)
   1344 {
   1345    for (size_t z = 0; z < depth; z++)
   1346    {
   1347        for (size_t y = 0; y < height; y++)
   1348        {
   1349            const uint32_t *source =
   1350                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1351            float *destDepth =
   1352                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1353            uint32_t *destStencil =
   1354                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
   1355                1;
   1356            for (size_t x = 0; x < width; x++)
   1357            {
   1358                destDepth[x * 2]   = (source[x] & 0xFFFFFF) / static_cast<float>(0xFFFFFF);
   1359                destStencil[x * 2] = source[x] & 0xFF000000;
   1360            }
   1361        }
   1362    }
   1363 }
   1364 
   1365 void LoadD24S8ToD32F(size_t width,
   1366                     size_t height,
   1367                     size_t depth,
   1368                     const uint8_t *input,
   1369                     size_t inputRowPitch,
   1370                     size_t inputDepthPitch,
   1371                     uint8_t *output,
   1372                     size_t outputRowPitch,
   1373                     size_t outputDepthPitch)
   1374 {
   1375    for (size_t z = 0; z < depth; z++)
   1376    {
   1377        for (size_t y = 0; y < height; y++)
   1378        {
   1379            const uint32_t *source =
   1380                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1381            float *destDepth =
   1382                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1383            for (size_t x = 0; x < width; x++)
   1384            {
   1385                uint32_t sourcePixel = (source[x] >> 8) & 0xFFFFFF;
   1386                destDepth[x]         = sourcePixel / static_cast<float>(0xFFFFFF);
   1387            }
   1388        }
   1389    }
   1390 }
   1391 
   1392 void LoadD32ToD32FX32(size_t width,
   1393                      size_t height,
   1394                      size_t depth,
   1395                      const uint8_t *input,
   1396                      size_t inputRowPitch,
   1397                      size_t inputDepthPitch,
   1398                      uint8_t *output,
   1399                      size_t outputRowPitch,
   1400                      size_t outputDepthPitch)
   1401 {
   1402    for (size_t z = 0; z < depth; z++)
   1403    {
   1404        for (size_t y = 0; y < height; y++)
   1405        {
   1406            const uint32_t *source =
   1407                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1408            float *destDepth =
   1409                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1410            for (size_t x = 0; x < width; x++)
   1411            {
   1412                destDepth[x * 2] = source[x] / static_cast<float>(0xFFFFFFFF);
   1413            }
   1414        }
   1415    }
   1416 }
   1417 
   1418 void LoadD32ToD32F(size_t width,
   1419                   size_t height,
   1420                   size_t depth,
   1421                   const uint8_t *input,
   1422                   size_t inputRowPitch,
   1423                   size_t inputDepthPitch,
   1424                   uint8_t *output,
   1425                   size_t outputRowPitch,
   1426                   size_t outputDepthPitch)
   1427 {
   1428    for (size_t z = 0; z < depth; z++)
   1429    {
   1430        for (size_t y = 0; y < height; y++)
   1431        {
   1432            const uint32_t *source =
   1433                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1434            float *destDepth =
   1435                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1436            for (size_t x = 0; x < width; x++)
   1437            {
   1438                uint32_t sourcePixel = source[x];
   1439                destDepth[x]         = sourcePixel / static_cast<float>(0xFFFFFFFF);
   1440            }
   1441        }
   1442    }
   1443 }
   1444 
   1445 void LoadD32FToD32F(size_t width,
   1446                    size_t height,
   1447                    size_t depth,
   1448                    const uint8_t *input,
   1449                    size_t inputRowPitch,
   1450                    size_t inputDepthPitch,
   1451                    uint8_t *output,
   1452                    size_t outputRowPitch,
   1453                    size_t outputDepthPitch)
   1454 {
   1455    for (size_t z = 0; z < depth; z++)
   1456    {
   1457        for (size_t y = 0; y < height; y++)
   1458        {
   1459            const float *source =
   1460                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1461            float *dest =
   1462                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1463            for (size_t x = 0; x < width; x++)
   1464            {
   1465                dest[x] = gl::clamp01(source[x]);
   1466            }
   1467        }
   1468    }
   1469 }
   1470 
   1471 void LoadD32FS8X24ToD24S8(size_t width,
   1472                          size_t height,
   1473                          size_t depth,
   1474                          const uint8_t *input,
   1475                          size_t inputRowPitch,
   1476                          size_t inputDepthPitch,
   1477                          uint8_t *output,
   1478                          size_t outputRowPitch,
   1479                          size_t outputDepthPitch)
   1480 {
   1481    for (size_t z = 0; z < depth; z++)
   1482    {
   1483        for (size_t y = 0; y < height; y++)
   1484        {
   1485            const float *sourceDepth =
   1486                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1487            const uint32_t *sourceStencil =
   1488                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
   1489            uint32_t *dest =
   1490                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1491            for (size_t x = 0; x < width; x++)
   1492            {
   1493                uint32_t d = static_cast<uint32_t>(gl::clamp01(sourceDepth[x * 2]) * 0xFFFFFF);
   1494                uint32_t s = sourceStencil[x * 2] & 0xFF000000;
   1495                dest[x]    = d | s;
   1496            }
   1497        }
   1498    }
   1499 }
   1500 
   1501 void LoadX24S8ToS8(size_t width,
   1502                   size_t height,
   1503                   size_t depth,
   1504                   const uint8_t *input,
   1505                   size_t inputRowPitch,
   1506                   size_t inputDepthPitch,
   1507                   uint8_t *output,
   1508                   size_t outputRowPitch,
   1509                   size_t outputDepthPitch)
   1510 {
   1511    for (size_t z = 0; z < depth; z++)
   1512    {
   1513        for (size_t y = 0; y < height; y++)
   1514        {
   1515            const uint32_t *source = reinterpret_cast<const uint32_t *>(
   1516                input + (y * inputRowPitch) + (z * inputDepthPitch));
   1517            uint8_t *destStencil =
   1518                reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
   1519            for (size_t x = 0; x < width; x++)
   1520            {
   1521                destStencil[x] = (source[x] & 0xFF);
   1522            }
   1523        }
   1524    }
   1525 }
   1526 
   1527 void LoadX32S8ToS8(size_t width,
   1528                   size_t height,
   1529                   size_t depth,
   1530                   const uint8_t *input,
   1531                   size_t inputRowPitch,
   1532                   size_t inputDepthPitch,
   1533                   uint8_t *output,
   1534                   size_t outputRowPitch,
   1535                   size_t outputDepthPitch)
   1536 {
   1537    for (size_t z = 0; z < depth; z++)
   1538    {
   1539        for (size_t y = 0; y < height; y++)
   1540        {
   1541            const uint32_t *source = reinterpret_cast<const uint32_t *>(
   1542                input + (y * inputRowPitch) + (z * inputDepthPitch));
   1543            uint8_t *destStencil =
   1544                reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
   1545            for (size_t x = 0; x < width; x++)
   1546            {
   1547                destStencil[x] = (source[(x * 2) + 1] & 0xFF);
   1548            }
   1549        }
   1550    }
   1551 }
   1552 
   1553 void LoadD32FS8X24ToD32F(size_t width,
   1554                         size_t height,
   1555                         size_t depth,
   1556                         const uint8_t *input,
   1557                         size_t inputRowPitch,
   1558                         size_t inputDepthPitch,
   1559                         uint8_t *output,
   1560                         size_t outputRowPitch,
   1561                         size_t outputDepthPitch)
   1562 {
   1563    for (size_t z = 0; z < depth; z++)
   1564    {
   1565        for (size_t y = 0; y < height; y++)
   1566        {
   1567            const float *sourceDepth =
   1568                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1569            float *destDepth =
   1570                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1571            for (size_t x = 0; x < width; x++)
   1572            {
   1573                destDepth[x] = gl::clamp01(sourceDepth[x * 2]);
   1574            }
   1575        }
   1576    }
   1577 }
   1578 
   1579 void LoadD32FS8X24ToD32FS8X24(size_t width,
   1580                              size_t height,
   1581                              size_t depth,
   1582                              const uint8_t *input,
   1583                              size_t inputRowPitch,
   1584                              size_t inputDepthPitch,
   1585                              uint8_t *output,
   1586                              size_t outputRowPitch,
   1587                              size_t outputDepthPitch)
   1588 {
   1589    for (size_t z = 0; z < depth; z++)
   1590    {
   1591        for (size_t y = 0; y < height; y++)
   1592        {
   1593            const float *sourceDepth =
   1594                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1595            const uint32_t *sourceStencil =
   1596                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
   1597            float *destDepth =
   1598                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1599            uint32_t *destStencil =
   1600                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
   1601                1;
   1602            for (size_t x = 0; x < width; x++)
   1603            {
   1604                destDepth[x * 2]   = gl::clamp01(sourceDepth[x * 2]);
   1605                destStencil[x * 2] = sourceStencil[x * 2] & 0xFF000000;
   1606            }
   1607        }
   1608    }
   1609 }
   1610 
   1611 void LoadRGB32FToRGBA16F(size_t width,
   1612                         size_t height,
   1613                         size_t depth,
   1614                         const uint8_t *input,
   1615                         size_t inputRowPitch,
   1616                         size_t inputDepthPitch,
   1617                         uint8_t *output,
   1618                         size_t outputRowPitch,
   1619                         size_t outputDepthPitch)
   1620 {
   1621    for (size_t z = 0; z < depth; z++)
   1622    {
   1623        for (size_t y = 0; y < height; y++)
   1624        {
   1625            const float *source =
   1626                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1627            uint16_t *dest =
   1628                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1629            for (size_t x = 0; x < width; x++)
   1630            {
   1631                dest[x * 4 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
   1632                dest[x * 4 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
   1633                dest[x * 4 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
   1634                dest[x * 4 + 3] = gl::Float16One;
   1635            }
   1636        }
   1637    }
   1638 }
   1639 
   1640 void LoadRGB32FToRGB16F(size_t width,
   1641                        size_t height,
   1642                        size_t depth,
   1643                        const uint8_t *input,
   1644                        size_t inputRowPitch,
   1645                        size_t inputDepthPitch,
   1646                        uint8_t *output,
   1647                        size_t outputRowPitch,
   1648                        size_t outputDepthPitch)
   1649 {
   1650    for (size_t z = 0; z < depth; z++)
   1651    {
   1652        for (size_t y = 0; y < height; y++)
   1653        {
   1654            const float *source =
   1655                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
   1656            uint16_t *dest =
   1657                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1658            for (size_t x = 0; x < width; x++)
   1659            {
   1660                dest[x * 3 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
   1661                dest[x * 3 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
   1662                dest[x * 3 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
   1663            }
   1664        }
   1665    }
   1666 }
   1667 
   1668 void LoadR32ToR16(size_t width,
   1669                  size_t height,
   1670                  size_t depth,
   1671                  const uint8_t *input,
   1672                  size_t inputRowPitch,
   1673                  size_t inputDepthPitch,
   1674                  uint8_t *output,
   1675                  size_t outputRowPitch,
   1676                  size_t outputDepthPitch)
   1677 {
   1678    for (size_t z = 0; z < depth; z++)
   1679    {
   1680        for (size_t y = 0; y < height; y++)
   1681        {
   1682            const uint32_t *source =
   1683                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1684            uint16_t *dest =
   1685                priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1686            for (size_t x = 0; x < width; x++)
   1687            {
   1688                dest[x] = source[x] >> 16;
   1689            }
   1690        }
   1691    }
   1692 }
   1693 
   1694 void LoadR32ToR24G8(size_t width,
   1695                    size_t height,
   1696                    size_t depth,
   1697                    const uint8_t *input,
   1698                    size_t inputRowPitch,
   1699                    size_t inputDepthPitch,
   1700                    uint8_t *output,
   1701                    size_t outputRowPitch,
   1702                    size_t outputDepthPitch)
   1703 {
   1704    for (size_t z = 0; z < depth; z++)
   1705    {
   1706        for (size_t y = 0; y < height; y++)
   1707        {
   1708            const uint32_t *source =
   1709                priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1710            uint32_t *dest =
   1711                priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
   1712 
   1713            for (size_t x = 0; x < width; x++)
   1714            {
   1715                dest[x] = source[x] >> 8;
   1716            }
   1717        }
   1718    }
   1719 }
   1720 
   1721 // This conversion was added to support using a 32F depth buffer
   1722 // as emulation for 16unorm depth buffer in Metal.
   1723 // See angleproject:6597
   1724 void LoadUNorm16To32F(size_t width,
   1725                      size_t height,
   1726                      size_t depth,
   1727                      const uint8_t *input,
   1728                      size_t inputRowPitch,
   1729                      size_t inputDepthPitch,
   1730                      uint8_t *output,
   1731                      size_t outputRowPitch,
   1732                      size_t outputDepthPitch)
   1733 {
   1734    for (size_t z = 0; z < depth; z++)
   1735    {
   1736        for (size_t y = 0; y < height; y++)
   1737        {
   1738            const uint16_t *source =
   1739                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1740            float *dest =
   1741                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1742            for (size_t x = 0; x < width; x++)
   1743            {
   1744                dest[x] = static_cast<float>(source[x]) / 0xFFFF;
   1745            }
   1746        }
   1747    }
   1748 }
   1749 
   1750 // This conversion was added to support using a 32F depth buffer
   1751 // as emulation for 16unorm depth buffer in Metal. In OpenGL ES 3.0
   1752 // you're allowed to pass UNSIGNED_INT as input to texImage2D and
   1753 // so this conversion is neccasary.
   1754 //
   1755 // See angleproject:6597
   1756 void LoadUNorm32To32F(size_t width,
   1757                      size_t height,
   1758                      size_t depth,
   1759                      const uint8_t *input,
   1760                      size_t inputRowPitch,
   1761                      size_t inputDepthPitch,
   1762                      uint8_t *output,
   1763                      size_t outputRowPitch,
   1764                      size_t outputDepthPitch)
   1765 {
   1766    for (size_t z = 0; z < depth; z++)
   1767    {
   1768        for (size_t y = 0; y < height; y++)
   1769        {
   1770            const uint16_t *source =
   1771                priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
   1772            float *dest =
   1773                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
   1774            for (size_t x = 0; x < width; x++)
   1775            {
   1776                dest[x] = static_cast<float>(source[x]) / static_cast<float>(0xFFFFFFFFU);
   1777            }
   1778        }
   1779    }
   1780 }
   1781 
   1782 void LoadYuvToNative(size_t width,
   1783                     size_t height,
   1784                     size_t depth,
   1785                     const uint8_t *input,
   1786                     size_t inputRowPitch,
   1787                     size_t inputDepthPitch,
   1788                     uint8_t *output,
   1789                     size_t outputRowPitch,
   1790                     size_t outputDepthPitch)
   1791 {
   1792    // For YUV formats it is assumed that source has tightly packed data.
   1793    memcpy(output, input, inputDepthPitch);
   1794 }
   1795 
   1796 }  // namespace angle