tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lossless.c (26406B)


      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
     13 //          Jyrki Alakuijala (jyrki@google.com)
     14 //          Urvang Joshi (urvang@google.com)
     15 
     16 #include "src/dsp/lossless.h"
     17 
     18 #include <assert.h>
     19 #include <stdlib.h>
     20 #include <string.h>
     21 
     22 #include "src/dec/vp8li_dec.h"
     23 #include "src/dsp/cpu.h"
     24 #include "src/dsp/dsp.h"
     25 #include "src/dsp/lossless_common.h"
     26 #include "src/utils/endian_inl_utils.h"
     27 #include "src/utils/utils.h"
     28 #include "src/webp/decode.h"
     29 #include "src/webp/format_constants.h"
     30 #include "src/webp/types.h"
     31 
     32 //------------------------------------------------------------------------------
     33 // Image transforms.
     34 
     35 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
     36  return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
     37 }
     38 
     39 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
     40  return Average2(Average2(a0, a2), a1);
     41 }
     42 
     43 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
     44                                     uint32_t a2, uint32_t a3) {
     45  return Average2(Average2(a0, a1), Average2(a2, a3));
     46 }
     47 
     48 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
     49  if (a < 256) {
     50    return a;
     51  }
     52  // return 0, when a is a negative integer.
     53  // return 255, when a is positive.
     54  return ~a >> 24;
     55 }
     56 
     57 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
     58  return Clip255((uint32_t)(a + b - c));
     59 }
     60 
     61 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
     62                                                   uint32_t c2) {
     63  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
     64  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
     65                                         (c1 >> 16) & 0xff,
     66                                         (c2 >> 16) & 0xff);
     67  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
     68                                         (c1 >> 8) & 0xff,
     69                                         (c2 >> 8) & 0xff);
     70  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
     71  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     72 }
     73 
     74 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
     75  return Clip255((uint32_t)(a + (a - b) / 2));
     76 }
     77 
     78 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
     79                                                   uint32_t c2) {
     80  const uint32_t ave = Average2(c0, c1);
     81  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
     82  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
     83  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
     84  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
     85  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     86 }
     87 
     88 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
     89 // inlined.
     90 #if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
     91 # define LOCAL_INLINE __attribute__ ((noinline))
     92 #else
     93 # define LOCAL_INLINE WEBP_INLINE
     94 #endif
     95 
     96 static LOCAL_INLINE int Sub3(int a, int b, int c) {
     97  const int pb = b - c;
     98  const int pa = a - c;
     99  return abs(pb) - abs(pa);
    100 }
    101 
    102 #undef LOCAL_INLINE
    103 
    104 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    105  const int pa_minus_pb =
    106      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    107      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    108      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    109      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    110  return (pa_minus_pb <= 0) ? a : b;
    111 }
    112 
    113 //------------------------------------------------------------------------------
    114 // Predictors
    115 
    116 static uint32_t VP8LPredictor0_C(const uint32_t* const left,
    117                                 const uint32_t* const top) {
    118  (void)top;
    119  (void)left;
    120  return ARGB_BLACK;
    121 }
    122 static uint32_t VP8LPredictor1_C(const uint32_t* const left,
    123                                 const uint32_t* const top) {
    124  (void)top;
    125  return *left;
    126 }
    127 uint32_t VP8LPredictor2_C(const uint32_t* const left,
    128                          const uint32_t* const top) {
    129  (void)left;
    130  return top[0];
    131 }
    132 uint32_t VP8LPredictor3_C(const uint32_t* const left,
    133                          const uint32_t* const top) {
    134  (void)left;
    135  return top[1];
    136 }
    137 uint32_t VP8LPredictor4_C(const uint32_t* const left,
    138                          const uint32_t* const top) {
    139  (void)left;
    140  return top[-1];
    141 }
    142 uint32_t VP8LPredictor5_C(const uint32_t* const left,
    143                          const uint32_t* const top) {
    144  const uint32_t pred = Average3(*left, top[0], top[1]);
    145  return pred;
    146 }
    147 uint32_t VP8LPredictor6_C(const uint32_t* const left,
    148                          const uint32_t* const top) {
    149  const uint32_t pred = Average2(*left, top[-1]);
    150  return pred;
    151 }
    152 uint32_t VP8LPredictor7_C(const uint32_t* const left,
    153                          const uint32_t* const top) {
    154  const uint32_t pred = Average2(*left, top[0]);
    155  return pred;
    156 }
    157 uint32_t VP8LPredictor8_C(const uint32_t* const left,
    158                          const uint32_t* const top) {
    159  const uint32_t pred = Average2(top[-1], top[0]);
    160  (void)left;
    161  return pred;
    162 }
    163 uint32_t VP8LPredictor9_C(const uint32_t* const left,
    164                          const uint32_t* const top) {
    165  const uint32_t pred = Average2(top[0], top[1]);
    166  (void)left;
    167  return pred;
    168 }
    169 uint32_t VP8LPredictor10_C(const uint32_t* const left,
    170                           const uint32_t* const top) {
    171  const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
    172  return pred;
    173 }
    174 uint32_t VP8LPredictor11_C(const uint32_t* const left,
    175                           const uint32_t* const top) {
    176  const uint32_t pred = Select(top[0], *left, top[-1]);
    177  return pred;
    178 }
    179 uint32_t VP8LPredictor12_C(const uint32_t* const left,
    180                           const uint32_t* const top) {
    181  const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
    182  return pred;
    183 }
    184 uint32_t VP8LPredictor13_C(const uint32_t* const left,
    185                           const uint32_t* const top) {
    186  const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
    187  return pred;
    188 }
    189 
    190 static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
    191                            int num_pixels, uint32_t* WEBP_RESTRICT out) {
    192  int x;
    193  (void)upper;
    194  for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
    195 }
    196 static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
    197                            int num_pixels, uint32_t* WEBP_RESTRICT out) {
    198  int i;
    199  uint32_t left = out[-1];
    200  (void)upper;
    201  for (i = 0; i < num_pixels; ++i) {
    202    out[i] = left = VP8LAddPixels(in[i], left);
    203  }
    204 }
    205 GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C)
    206 GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C)
    207 GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C)
    208 GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C)
    209 GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C)
    210 GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C)
    211 GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C)
    212 GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C)
    213 GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C)
    214 GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C)
    215 GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C)
    216 GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C)
    217 
    218 //------------------------------------------------------------------------------
    219 
    220 // Inverse prediction.
    221 static void PredictorInverseTransform_C(const VP8LTransform* const transform,
    222                                        int y_start, int y_end,
    223                                        const uint32_t* in, uint32_t* out) {
    224  const int width = transform->xsize;
    225  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    226    PredictorAdd0_C(in, NULL, 1, out);
    227    PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
    228    in += width;
    229    out += width;
    230    ++y_start;
    231  }
    232 
    233  {
    234    int y = y_start;
    235    const int tile_width = 1 << transform->bits;
    236    const int mask = tile_width - 1;
    237    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
    238    const uint32_t* pred_mode_base =
    239        transform->data + (y >> transform->bits) * tiles_per_row;
    240 
    241    while (y < y_end) {
    242      const uint32_t* pred_mode_src = pred_mode_base;
    243      int x = 1;
    244      // First pixel follows the T (mode=2) mode.
    245      PredictorAdd2_C(in, out - width, 1, out);
    246      // .. the rest:
    247      while (x < width) {
    248        const VP8LPredictorAddSubFunc pred_func =
    249            VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
    250        int x_end = (x & ~mask) + tile_width;
    251        if (x_end > width) x_end = width;
    252        pred_func(in + x, out + x - width, x_end - x, out + x);
    253        x = x_end;
    254      }
    255      in += width;
    256      out += width;
    257      ++y;
    258      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    259        pred_mode_base += tiles_per_row;
    260      }
    261    }
    262  }
    263 }
    264 
    265 // Add green to blue and red channels (i.e. perform the inverse transform of
    266 // 'subtract green').
    267 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
    268                                uint32_t* dst) {
    269  int i;
    270  for (i = 0; i < num_pixels; ++i) {
    271    const uint32_t argb = src[i];
    272    const uint32_t green = ((argb >> 8) & 0xff);
    273    uint32_t red_blue = (argb & 0x00ff00ffu);
    274    red_blue += (green << 16) | green;
    275    red_blue &= 0x00ff00ffu;
    276    dst[i] = (argb & 0xff00ff00u) | red_blue;
    277  }
    278 }
    279 
    280 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
    281                                           int8_t color) {
    282  return ((int)color_pred * color) >> 5;
    283 }
    284 
    285 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    286                                               VP8LMultipliers* const m) {
    287  m->green_to_red  = (color_code >>  0) & 0xff;
    288  m->green_to_blue = (color_code >>  8) & 0xff;
    289  m->red_to_blue   = (color_code >> 16) & 0xff;
    290 }
    291 
    292 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
    293                                 const uint32_t* src, int num_pixels,
    294                                 uint32_t* dst) {
    295  int i;
    296  for (i = 0; i < num_pixels; ++i) {
    297    const uint32_t argb = src[i];
    298    const int8_t green = (int8_t)(argb >> 8);
    299    const uint32_t red = argb >> 16;
    300    int new_red = red & 0xff;
    301    int new_blue = argb & 0xff;
    302    new_red += ColorTransformDelta((int8_t)m->green_to_red, green);
    303    new_red &= 0xff;
    304    new_blue += ColorTransformDelta((int8_t)m->green_to_blue, green);
    305    new_blue += ColorTransformDelta((int8_t)m->red_to_blue, (int8_t)new_red);
    306    new_blue &= 0xff;
    307    dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    308  }
    309 }
    310 
    311 // Color space inverse transform.
    312 static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
    313                                         int y_start, int y_end,
    314                                         const uint32_t* src, uint32_t* dst) {
    315  const int width = transform->xsize;
    316  const int tile_width = 1 << transform->bits;
    317  const int mask = tile_width - 1;
    318  const int safe_width = width & ~mask;
    319  const int remaining_width = width - safe_width;
    320  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits);
    321  int y = y_start;
    322  const uint32_t* pred_row =
    323      transform->data + (y >> transform->bits) * tiles_per_row;
    324 
    325  while (y < y_end) {
    326    const uint32_t* pred = pred_row;
    327    VP8LMultipliers m = { 0, 0, 0 };
    328    const uint32_t* const src_safe_end = src + safe_width;
    329    const uint32_t* const src_end = src + width;
    330    while (src < src_safe_end) {
    331      ColorCodeToMultipliers(*pred++, &m);
    332      VP8LTransformColorInverse(&m, src, tile_width, dst);
    333      src += tile_width;
    334      dst += tile_width;
    335    }
    336    if (src < src_end) {  // Left-overs using C-version.
    337      ColorCodeToMultipliers(*pred++, &m);
    338      VP8LTransformColorInverse(&m, src, remaining_width, dst);
    339      src += remaining_width;
    340      dst += remaining_width;
    341    }
    342    ++y;
    343    if ((y & mask) == 0) pred_row += tiles_per_row;
    344  }
    345 }
    346 
    347 // Separate out pixels packed together using pixel-bundling.
    348 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
    349 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
    350                            GET_INDEX, GET_VALUE)                              \
    351 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
    352                   TYPE* dst, int y_start, int y_end, int width) {             \
    353  int y;                                                                       \
    354  for (y = y_start; y < y_end; ++y) {                                          \
    355    int x;                                                                     \
    356    for (x = 0; x < width; ++x) {                                              \
    357      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
    358    }                                                                          \
    359  }                                                                            \
    360 }                                                                              \
    361 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
    362                           int y_start, int y_end, const TYPE* src,            \
    363                           TYPE* dst) {                                        \
    364  int y;                                                                       \
    365  const int bits_per_pixel = 8 >> transform->bits;                             \
    366  const int width = transform->xsize;                                          \
    367  const uint32_t* const color_map = transform->data;                           \
    368  if (bits_per_pixel < 8) {                                                    \
    369    const int pixels_per_byte = 1 << transform->bits;                          \
    370    const int count_mask = pixels_per_byte - 1;                                \
    371    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
    372    for (y = y_start; y < y_end; ++y) {                                        \
    373      uint32_t packed_pixels = 0;                                              \
    374      int x;                                                                   \
    375      for (x = 0; x < width; ++x) {                                            \
    376        /* We need to load fresh 'packed_pixels' once every                */  \
    377        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
    378        /* is a power of 2, so can just use a mask for that, instead of    */  \
    379        /* decrementing a counter.                                         */  \
    380        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
    381        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
    382        packed_pixels >>= bits_per_pixel;                                      \
    383      }                                                                        \
    384    }                                                                          \
    385  } else {                                                                     \
    386    VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
    387  }                                                                            \
    388 }
    389 
    390 COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
    391                    uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
    392 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
    393                    uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
    394 
    395 #undef COLOR_INDEX_INVERSE
    396 
    397 void VP8LInverseTransform(const VP8LTransform* const transform,
    398                          int row_start, int row_end,
    399                          const uint32_t* const in, uint32_t* const out) {
    400  const int width = transform->xsize;
    401  assert(row_start < row_end);
    402  assert(row_end <= transform->ysize);
    403  switch (transform->type) {
    404    case SUBTRACT_GREEN_TRANSFORM:
    405      VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
    406      break;
    407    case PREDICTOR_TRANSFORM:
    408      PredictorInverseTransform_C(transform, row_start, row_end, in, out);
    409      if (row_end != transform->ysize) {
    410        // The last predicted row in this iteration will be the top-pred row
    411        // for the first row in next iteration.
    412        memcpy(out - width, out + (row_end - row_start - 1) * width,
    413               width * sizeof(*out));
    414      }
    415      break;
    416    case CROSS_COLOR_TRANSFORM:
    417      ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
    418      break;
    419    case COLOR_INDEXING_TRANSFORM:
    420      if (in == out && transform->bits > 0) {
    421        // Move packed pixels to the end of unpacked region, so that unpacking
    422        // can occur seamlessly.
    423        // Also, note that this is the only transform that applies on
    424        // the effective width of VP8LSubSampleSize(xsize, bits). All other
    425        // transforms work on effective width of 'xsize'.
    426        const int out_stride = (row_end - row_start) * width;
    427        const int in_stride = (row_end - row_start) *
    428            VP8LSubSampleSize(transform->xsize, transform->bits);
    429        uint32_t* const src = out + out_stride - in_stride;
    430        memmove(src, out, in_stride * sizeof(*src));
    431        ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
    432      } else {
    433        ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
    434      }
    435      break;
    436  }
    437 }
    438 
    439 //------------------------------------------------------------------------------
    440 // Color space conversion.
    441 
    442 static int is_big_endian(void) {
    443  static const union {
    444    uint16_t w;
    445    uint8_t b[2];
    446  } tmp = { 1 };
    447  return (tmp.b[0] != 1);
    448 }
    449 
    450 void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
    451                            int num_pixels, uint8_t* WEBP_RESTRICT dst) {
    452  const uint32_t* const src_end = src + num_pixels;
    453  while (src < src_end) {
    454    const uint32_t argb = *src++;
    455    *dst++ = (argb >> 16) & 0xff;
    456    *dst++ = (argb >>  8) & 0xff;
    457    *dst++ = (argb >>  0) & 0xff;
    458  }
    459 }
    460 
    461 void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
    462                             int num_pixels, uint8_t* WEBP_RESTRICT dst) {
    463  const uint32_t* const src_end = src + num_pixels;
    464  while (src < src_end) {
    465    const uint32_t argb = *src++;
    466    *dst++ = (argb >> 16) & 0xff;
    467    *dst++ = (argb >>  8) & 0xff;
    468    *dst++ = (argb >>  0) & 0xff;
    469    *dst++ = (argb >> 24) & 0xff;
    470  }
    471 }
    472 
    473 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
    474                                 int num_pixels, uint8_t* WEBP_RESTRICT dst) {
    475  const uint32_t* const src_end = src + num_pixels;
    476  while (src < src_end) {
    477    const uint32_t argb = *src++;
    478    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
    479    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
    480 #if (WEBP_SWAP_16BIT_CSP == 1)
    481    *dst++ = ba;
    482    *dst++ = rg;
    483 #else
    484    *dst++ = rg;
    485    *dst++ = ba;
    486 #endif
    487  }
    488 }
    489 
    490 void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
    491                               int num_pixels, uint8_t* WEBP_RESTRICT dst) {
    492  const uint32_t* const src_end = src + num_pixels;
    493  while (src < src_end) {
    494    const uint32_t argb = *src++;
    495    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
    496    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
    497 #if (WEBP_SWAP_16BIT_CSP == 1)
    498    *dst++ = gb;
    499    *dst++ = rg;
    500 #else
    501    *dst++ = rg;
    502    *dst++ = gb;
    503 #endif
    504  }
    505 }
    506 
    507 void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
    508                            int num_pixels, uint8_t* WEBP_RESTRICT dst) {
    509  const uint32_t* const src_end = src + num_pixels;
    510  while (src < src_end) {
    511    const uint32_t argb = *src++;
    512    *dst++ = (argb >>  0) & 0xff;
    513    *dst++ = (argb >>  8) & 0xff;
    514    *dst++ = (argb >> 16) & 0xff;
    515  }
    516 }
    517 
    518 static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
    519                       uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
    520  if (is_big_endian() == swap_on_big_endian) {
    521    const uint32_t* const src_end = src + num_pixels;
    522    while (src < src_end) {
    523      const uint32_t argb = *src++;
    524      WebPUint32ToMem(dst, BSwap32(argb));
    525      dst += sizeof(argb);
    526    }
    527  } else {
    528    memcpy(dst, src, num_pixels * sizeof(*src));
    529  }
    530 }
    531 
    532 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
    533                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
    534  switch (out_colorspace) {
    535    case MODE_RGB:
    536      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
    537      break;
    538    case MODE_RGBA:
    539      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    540      break;
    541    case MODE_rgbA:
    542      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    543      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    544      break;
    545    case MODE_BGR:
    546      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
    547      break;
    548    case MODE_BGRA:
    549      CopyOrSwap(in_data, num_pixels, rgba, 1);
    550      break;
    551    case MODE_bgrA:
    552      CopyOrSwap(in_data, num_pixels, rgba, 1);
    553      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    554      break;
    555    case MODE_ARGB:
    556      CopyOrSwap(in_data, num_pixels, rgba, 0);
    557      break;
    558    case MODE_Argb:
    559      CopyOrSwap(in_data, num_pixels, rgba, 0);
    560      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
    561      break;
    562    case MODE_RGBA_4444:
    563      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    564      break;
    565    case MODE_rgbA_4444:
    566      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    567      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
    568      break;
    569    case MODE_RGB_565:
    570      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
    571      break;
    572    default:
    573      assert(0);          // Code flow should not reach here.
    574  }
    575 }
    576 
    577 //------------------------------------------------------------------------------
    578 
    579 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
    580 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
    581 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
    582 VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
    583 VP8LPredictorFunc VP8LPredictors[16];
    584 
    585 // exposed plain-C implementations
    586 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
    587 
    588 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
    589 VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
    590 
    591 VP8LConvertFunc VP8LConvertBGRAToRGB;
    592 VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
    593 VP8LConvertFunc VP8LConvertBGRAToRGBA;
    594 VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
    595 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
    596 VP8LConvertFunc VP8LConvertBGRAToRGB565;
    597 VP8LConvertFunc VP8LConvertBGRAToBGR;
    598 
    599 VP8LMapARGBFunc VP8LMapColor32b;
    600 VP8LMapAlphaFunc VP8LMapColor8b;
    601 
    602 extern VP8CPUInfo VP8GetCPUInfo;
    603 extern void VP8LDspInitSSE2(void);
    604 extern void VP8LDspInitSSE41(void);
    605 extern void VP8LDspInitAVX2(void);
    606 extern void VP8LDspInitNEON(void);
    607 extern void VP8LDspInitMIPSdspR2(void);
    608 extern void VP8LDspInitMSA(void);
    609 
    610 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
    611  (OUT)[0] = IN##0_C;                                     \
    612  (OUT)[1] = IN##1_C;                                     \
    613  (OUT)[2] = IN##2_C;                                     \
    614  (OUT)[3] = IN##3_C;                                     \
    615  (OUT)[4] = IN##4_C;                                     \
    616  (OUT)[5] = IN##5_C;                                     \
    617  (OUT)[6] = IN##6_C;                                     \
    618  (OUT)[7] = IN##7_C;                                     \
    619  (OUT)[8] = IN##8_C;                                     \
    620  (OUT)[9] = IN##9_C;                                     \
    621  (OUT)[10] = IN##10_C;                                   \
    622  (OUT)[11] = IN##11_C;                                   \
    623  (OUT)[12] = IN##12_C;                                   \
    624  (OUT)[13] = IN##13_C;                                   \
    625  (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
    626  (OUT)[15] = IN##0_C;                                    \
    627 } while (0);
    628 
    629 WEBP_DSP_INIT_FUNC(VP8LDspInit) {
    630  COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors)
    631  COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
    632  COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
    633 
    634 #if !WEBP_NEON_OMIT_C_CODE
    635  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
    636 
    637  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
    638 
    639  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
    640  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
    641  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
    642 #endif
    643 
    644  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
    645  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
    646 
    647  VP8LMapColor32b = MapARGB_C;
    648  VP8LMapColor8b = MapAlpha_C;
    649 
    650  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    651  if (VP8GetCPUInfo != NULL) {
    652 #if defined(WEBP_HAVE_SSE2)
    653    if (VP8GetCPUInfo(kSSE2)) {
    654      VP8LDspInitSSE2();
    655 #if defined(WEBP_HAVE_SSE41)
    656      if (VP8GetCPUInfo(kSSE4_1)) {
    657        VP8LDspInitSSE41();
    658 #if defined(WEBP_HAVE_AVX2)
    659        if (VP8GetCPUInfo(kAVX2)) {
    660          VP8LDspInitAVX2();
    661        }
    662 #endif
    663      }
    664 #endif
    665    }
    666 #endif
    667 #if defined(WEBP_USE_MIPS_DSP_R2)
    668    if (VP8GetCPUInfo(kMIPSdspR2)) {
    669      VP8LDspInitMIPSdspR2();
    670    }
    671 #endif
    672 #if defined(WEBP_USE_MSA)
    673    if (VP8GetCPUInfo(kMSA)) {
    674      VP8LDspInitMSA();
    675    }
    676 #endif
    677  }
    678 
    679 #if defined(WEBP_HAVE_NEON)
    680  if (WEBP_NEON_OMIT_C_CODE ||
    681      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
    682    VP8LDspInitNEON();
    683  }
    684 #endif
    685 
    686  assert(VP8LAddGreenToBlueAndRed != NULL);
    687  assert(VP8LTransformColorInverse != NULL);
    688  assert(VP8LConvertBGRAToRGBA != NULL);
    689  assert(VP8LConvertBGRAToRGB != NULL);
    690  assert(VP8LConvertBGRAToBGR != NULL);
    691  assert(VP8LConvertBGRAToRGBA4444 != NULL);
    692  assert(VP8LConvertBGRAToRGB565 != NULL);
    693  assert(VP8LMapColor32b != NULL);
    694  assert(VP8LMapColor8b != NULL);
    695 }
    696 #undef COPY_PREDICTOR_ARRAY
    697 
    698 //------------------------------------------------------------------------------