lossless.c (26406B)
1 // Copyright 2012 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Image transforms and color space conversion methods for lossless decoder. 11 // 12 // Authors: Vikas Arora (vikaas.arora@gmail.com) 13 // Jyrki Alakuijala (jyrki@google.com) 14 // Urvang Joshi (urvang@google.com) 15 16 #include "src/dsp/lossless.h" 17 18 #include <assert.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 #include "src/dec/vp8li_dec.h" 23 #include "src/dsp/cpu.h" 24 #include "src/dsp/dsp.h" 25 #include "src/dsp/lossless_common.h" 26 #include "src/utils/endian_inl_utils.h" 27 #include "src/utils/utils.h" 28 #include "src/webp/decode.h" 29 #include "src/webp/format_constants.h" 30 #include "src/webp/types.h" 31 32 //------------------------------------------------------------------------------ 33 // Image transforms. 34 35 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 36 return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1); 37 } 38 39 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 40 return Average2(Average2(a0, a2), a1); 41 } 42 43 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 44 uint32_t a2, uint32_t a3) { 45 return Average2(Average2(a0, a1), Average2(a2, a3)); 46 } 47 48 static WEBP_INLINE uint32_t Clip255(uint32_t a) { 49 if (a < 256) { 50 return a; 51 } 52 // return 0, when a is a negative integer. 53 // return 255, when a is positive. 54 return ~a >> 24; 55 } 56 57 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { 58 return Clip255((uint32_t)(a + b - c)); 59 } 60 61 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, 62 uint32_t c2) { 63 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); 64 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, 65 (c1 >> 16) & 0xff, 66 (c2 >> 16) & 0xff); 67 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, 68 (c1 >> 8) & 0xff, 69 (c2 >> 8) & 0xff); 70 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); 71 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; 72 } 73 74 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { 75 return Clip255((uint32_t)(a + (a - b) / 2)); 76 } 77 78 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, 79 uint32_t c2) { 80 const uint32_t ave = Average2(c0, c1); 81 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); 82 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); 83 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); 84 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); 85 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; 86 } 87 88 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is 89 // inlined. 90 #if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409 91 # define LOCAL_INLINE __attribute__ ((noinline)) 92 #else 93 # define LOCAL_INLINE WEBP_INLINE 94 #endif 95 96 static LOCAL_INLINE int Sub3(int a, int b, int c) { 97 const int pb = b - c; 98 const int pa = a - c; 99 return abs(pb) - abs(pa); 100 } 101 102 #undef LOCAL_INLINE 103 104 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 105 const int pa_minus_pb = 106 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + 107 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + 108 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + 109 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); 110 return (pa_minus_pb <= 0) ? a : b; 111 } 112 113 //------------------------------------------------------------------------------ 114 // Predictors 115 116 static uint32_t VP8LPredictor0_C(const uint32_t* const left, 117 const uint32_t* const top) { 118 (void)top; 119 (void)left; 120 return ARGB_BLACK; 121 } 122 static uint32_t VP8LPredictor1_C(const uint32_t* const left, 123 const uint32_t* const top) { 124 (void)top; 125 return *left; 126 } 127 uint32_t VP8LPredictor2_C(const uint32_t* const left, 128 const uint32_t* const top) { 129 (void)left; 130 return top[0]; 131 } 132 uint32_t VP8LPredictor3_C(const uint32_t* const left, 133 const uint32_t* const top) { 134 (void)left; 135 return top[1]; 136 } 137 uint32_t VP8LPredictor4_C(const uint32_t* const left, 138 const uint32_t* const top) { 139 (void)left; 140 return top[-1]; 141 } 142 uint32_t VP8LPredictor5_C(const uint32_t* const left, 143 const uint32_t* const top) { 144 const uint32_t pred = Average3(*left, top[0], top[1]); 145 return pred; 146 } 147 uint32_t VP8LPredictor6_C(const uint32_t* const left, 148 const uint32_t* const top) { 149 const uint32_t pred = Average2(*left, top[-1]); 150 return pred; 151 } 152 uint32_t VP8LPredictor7_C(const uint32_t* const left, 153 const uint32_t* const top) { 154 const uint32_t pred = Average2(*left, top[0]); 155 return pred; 156 } 157 uint32_t VP8LPredictor8_C(const uint32_t* const left, 158 const uint32_t* const top) { 159 const uint32_t pred = Average2(top[-1], top[0]); 160 (void)left; 161 return pred; 162 } 163 uint32_t VP8LPredictor9_C(const uint32_t* const left, 164 const uint32_t* const top) { 165 const uint32_t pred = Average2(top[0], top[1]); 166 (void)left; 167 return pred; 168 } 169 uint32_t VP8LPredictor10_C(const uint32_t* const left, 170 const uint32_t* const top) { 171 const uint32_t pred = Average4(*left, top[-1], top[0], top[1]); 172 return pred; 173 } 174 uint32_t VP8LPredictor11_C(const uint32_t* const left, 175 const uint32_t* const top) { 176 const uint32_t pred = Select(top[0], *left, top[-1]); 177 return pred; 178 } 179 uint32_t VP8LPredictor12_C(const uint32_t* const left, 180 const uint32_t* const top) { 181 const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]); 182 return pred; 183 } 184 uint32_t VP8LPredictor13_C(const uint32_t* const left, 185 const uint32_t* const top) { 186 const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]); 187 return pred; 188 } 189 190 static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper, 191 int num_pixels, uint32_t* WEBP_RESTRICT out) { 192 int x; 193 (void)upper; 194 for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK); 195 } 196 static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, 197 int num_pixels, uint32_t* WEBP_RESTRICT out) { 198 int i; 199 uint32_t left = out[-1]; 200 (void)upper; 201 for (i = 0; i < num_pixels; ++i) { 202 out[i] = left = VP8LAddPixels(in[i], left); 203 } 204 } 205 GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C) 206 GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C) 207 GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C) 208 GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C) 209 GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C) 210 GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C) 211 GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C) 212 GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C) 213 GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C) 214 GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C) 215 GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C) 216 GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C) 217 218 //------------------------------------------------------------------------------ 219 220 // Inverse prediction. 221 static void PredictorInverseTransform_C(const VP8LTransform* const transform, 222 int y_start, int y_end, 223 const uint32_t* in, uint32_t* out) { 224 const int width = transform->xsize; 225 if (y_start == 0) { // First Row follows the L (mode=1) mode. 226 PredictorAdd0_C(in, NULL, 1, out); 227 PredictorAdd1_C(in + 1, NULL, width - 1, out + 1); 228 in += width; 229 out += width; 230 ++y_start; 231 } 232 233 { 234 int y = y_start; 235 const int tile_width = 1 << transform->bits; 236 const int mask = tile_width - 1; 237 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits); 238 const uint32_t* pred_mode_base = 239 transform->data + (y >> transform->bits) * tiles_per_row; 240 241 while (y < y_end) { 242 const uint32_t* pred_mode_src = pred_mode_base; 243 int x = 1; 244 // First pixel follows the T (mode=2) mode. 245 PredictorAdd2_C(in, out - width, 1, out); 246 // .. the rest: 247 while (x < width) { 248 const VP8LPredictorAddSubFunc pred_func = 249 VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf]; 250 int x_end = (x & ~mask) + tile_width; 251 if (x_end > width) x_end = width; 252 pred_func(in + x, out + x - width, x_end - x, out + x); 253 x = x_end; 254 } 255 in += width; 256 out += width; 257 ++y; 258 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. 259 pred_mode_base += tiles_per_row; 260 } 261 } 262 } 263 } 264 265 // Add green to blue and red channels (i.e. perform the inverse transform of 266 // 'subtract green'). 267 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels, 268 uint32_t* dst) { 269 int i; 270 for (i = 0; i < num_pixels; ++i) { 271 const uint32_t argb = src[i]; 272 const uint32_t green = ((argb >> 8) & 0xff); 273 uint32_t red_blue = (argb & 0x00ff00ffu); 274 red_blue += (green << 16) | green; 275 red_blue &= 0x00ff00ffu; 276 dst[i] = (argb & 0xff00ff00u) | red_blue; 277 } 278 } 279 280 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred, 281 int8_t color) { 282 return ((int)color_pred * color) >> 5; 283 } 284 285 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, 286 VP8LMultipliers* const m) { 287 m->green_to_red = (color_code >> 0) & 0xff; 288 m->green_to_blue = (color_code >> 8) & 0xff; 289 m->red_to_blue = (color_code >> 16) & 0xff; 290 } 291 292 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, 293 const uint32_t* src, int num_pixels, 294 uint32_t* dst) { 295 int i; 296 for (i = 0; i < num_pixels; ++i) { 297 const uint32_t argb = src[i]; 298 const int8_t green = (int8_t)(argb >> 8); 299 const uint32_t red = argb >> 16; 300 int new_red = red & 0xff; 301 int new_blue = argb & 0xff; 302 new_red += ColorTransformDelta((int8_t)m->green_to_red, green); 303 new_red &= 0xff; 304 new_blue += ColorTransformDelta((int8_t)m->green_to_blue, green); 305 new_blue += ColorTransformDelta((int8_t)m->red_to_blue, (int8_t)new_red); 306 new_blue &= 0xff; 307 dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); 308 } 309 } 310 311 // Color space inverse transform. 312 static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform, 313 int y_start, int y_end, 314 const uint32_t* src, uint32_t* dst) { 315 const int width = transform->xsize; 316 const int tile_width = 1 << transform->bits; 317 const int mask = tile_width - 1; 318 const int safe_width = width & ~mask; 319 const int remaining_width = width - safe_width; 320 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits); 321 int y = y_start; 322 const uint32_t* pred_row = 323 transform->data + (y >> transform->bits) * tiles_per_row; 324 325 while (y < y_end) { 326 const uint32_t* pred = pred_row; 327 VP8LMultipliers m = { 0, 0, 0 }; 328 const uint32_t* const src_safe_end = src + safe_width; 329 const uint32_t* const src_end = src + width; 330 while (src < src_safe_end) { 331 ColorCodeToMultipliers(*pred++, &m); 332 VP8LTransformColorInverse(&m, src, tile_width, dst); 333 src += tile_width; 334 dst += tile_width; 335 } 336 if (src < src_end) { // Left-overs using C-version. 337 ColorCodeToMultipliers(*pred++, &m); 338 VP8LTransformColorInverse(&m, src, remaining_width, dst); 339 src += remaining_width; 340 dst += remaining_width; 341 } 342 ++y; 343 if ((y & mask) == 0) pred_row += tiles_per_row; 344 } 345 } 346 347 // Separate out pixels packed together using pixel-bundling. 348 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t). 349 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX, \ 350 GET_INDEX, GET_VALUE) \ 351 static void F_NAME(const TYPE* src, const uint32_t* const color_map, \ 352 TYPE* dst, int y_start, int y_end, int width) { \ 353 int y; \ 354 for (y = y_start; y < y_end; ++y) { \ 355 int x; \ 356 for (x = 0; x < width; ++x) { \ 357 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ 358 } \ 359 } \ 360 } \ 361 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \ 362 int y_start, int y_end, const TYPE* src, \ 363 TYPE* dst) { \ 364 int y; \ 365 const int bits_per_pixel = 8 >> transform->bits; \ 366 const int width = transform->xsize; \ 367 const uint32_t* const color_map = transform->data; \ 368 if (bits_per_pixel < 8) { \ 369 const int pixels_per_byte = 1 << transform->bits; \ 370 const int count_mask = pixels_per_byte - 1; \ 371 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \ 372 for (y = y_start; y < y_end; ++y) { \ 373 uint32_t packed_pixels = 0; \ 374 int x; \ 375 for (x = 0; x < width; ++x) { \ 376 /* We need to load fresh 'packed_pixels' once every */ \ 377 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \ 378 /* is a power of 2, so can just use a mask for that, instead of */ \ 379 /* decrementing a counter. */ \ 380 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \ 381 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \ 382 packed_pixels >>= bits_per_pixel; \ 383 } \ 384 } \ 385 } else { \ 386 VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width); \ 387 } \ 388 } 389 390 COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static, 391 uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue) 392 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, , 393 uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue) 394 395 #undef COLOR_INDEX_INVERSE 396 397 void VP8LInverseTransform(const VP8LTransform* const transform, 398 int row_start, int row_end, 399 const uint32_t* const in, uint32_t* const out) { 400 const int width = transform->xsize; 401 assert(row_start < row_end); 402 assert(row_end <= transform->ysize); 403 switch (transform->type) { 404 case SUBTRACT_GREEN_TRANSFORM: 405 VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out); 406 break; 407 case PREDICTOR_TRANSFORM: 408 PredictorInverseTransform_C(transform, row_start, row_end, in, out); 409 if (row_end != transform->ysize) { 410 // The last predicted row in this iteration will be the top-pred row 411 // for the first row in next iteration. 412 memcpy(out - width, out + (row_end - row_start - 1) * width, 413 width * sizeof(*out)); 414 } 415 break; 416 case CROSS_COLOR_TRANSFORM: 417 ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out); 418 break; 419 case COLOR_INDEXING_TRANSFORM: 420 if (in == out && transform->bits > 0) { 421 // Move packed pixels to the end of unpacked region, so that unpacking 422 // can occur seamlessly. 423 // Also, note that this is the only transform that applies on 424 // the effective width of VP8LSubSampleSize(xsize, bits). All other 425 // transforms work on effective width of 'xsize'. 426 const int out_stride = (row_end - row_start) * width; 427 const int in_stride = (row_end - row_start) * 428 VP8LSubSampleSize(transform->xsize, transform->bits); 429 uint32_t* const src = out + out_stride - in_stride; 430 memmove(src, out, in_stride * sizeof(*src)); 431 ColorIndexInverseTransform_C(transform, row_start, row_end, src, out); 432 } else { 433 ColorIndexInverseTransform_C(transform, row_start, row_end, in, out); 434 } 435 break; 436 } 437 } 438 439 //------------------------------------------------------------------------------ 440 // Color space conversion. 441 442 static int is_big_endian(void) { 443 static const union { 444 uint16_t w; 445 uint8_t b[2]; 446 } tmp = { 1 }; 447 return (tmp.b[0] != 1); 448 } 449 450 void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, 451 int num_pixels, uint8_t* WEBP_RESTRICT dst) { 452 const uint32_t* const src_end = src + num_pixels; 453 while (src < src_end) { 454 const uint32_t argb = *src++; 455 *dst++ = (argb >> 16) & 0xff; 456 *dst++ = (argb >> 8) & 0xff; 457 *dst++ = (argb >> 0) & 0xff; 458 } 459 } 460 461 void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, 462 int num_pixels, uint8_t* WEBP_RESTRICT dst) { 463 const uint32_t* const src_end = src + num_pixels; 464 while (src < src_end) { 465 const uint32_t argb = *src++; 466 *dst++ = (argb >> 16) & 0xff; 467 *dst++ = (argb >> 8) & 0xff; 468 *dst++ = (argb >> 0) & 0xff; 469 *dst++ = (argb >> 24) & 0xff; 470 } 471 } 472 473 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src, 474 int num_pixels, uint8_t* WEBP_RESTRICT dst) { 475 const uint32_t* const src_end = src + num_pixels; 476 while (src < src_end) { 477 const uint32_t argb = *src++; 478 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); 479 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); 480 #if (WEBP_SWAP_16BIT_CSP == 1) 481 *dst++ = ba; 482 *dst++ = rg; 483 #else 484 *dst++ = rg; 485 *dst++ = ba; 486 #endif 487 } 488 } 489 490 void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src, 491 int num_pixels, uint8_t* WEBP_RESTRICT dst) { 492 const uint32_t* const src_end = src + num_pixels; 493 while (src < src_end) { 494 const uint32_t argb = *src++; 495 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); 496 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); 497 #if (WEBP_SWAP_16BIT_CSP == 1) 498 *dst++ = gb; 499 *dst++ = rg; 500 #else 501 *dst++ = rg; 502 *dst++ = gb; 503 #endif 504 } 505 } 506 507 void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, 508 int num_pixels, uint8_t* WEBP_RESTRICT dst) { 509 const uint32_t* const src_end = src + num_pixels; 510 while (src < src_end) { 511 const uint32_t argb = *src++; 512 *dst++ = (argb >> 0) & 0xff; 513 *dst++ = (argb >> 8) & 0xff; 514 *dst++ = (argb >> 16) & 0xff; 515 } 516 } 517 518 static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels, 519 uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) { 520 if (is_big_endian() == swap_on_big_endian) { 521 const uint32_t* const src_end = src + num_pixels; 522 while (src < src_end) { 523 const uint32_t argb = *src++; 524 WebPUint32ToMem(dst, BSwap32(argb)); 525 dst += sizeof(argb); 526 } 527 } else { 528 memcpy(dst, src, num_pixels * sizeof(*src)); 529 } 530 } 531 532 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, 533 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { 534 switch (out_colorspace) { 535 case MODE_RGB: 536 VP8LConvertBGRAToRGB(in_data, num_pixels, rgba); 537 break; 538 case MODE_RGBA: 539 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); 540 break; 541 case MODE_rgbA: 542 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); 543 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 544 break; 545 case MODE_BGR: 546 VP8LConvertBGRAToBGR(in_data, num_pixels, rgba); 547 break; 548 case MODE_BGRA: 549 CopyOrSwap(in_data, num_pixels, rgba, 1); 550 break; 551 case MODE_bgrA: 552 CopyOrSwap(in_data, num_pixels, rgba, 1); 553 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 554 break; 555 case MODE_ARGB: 556 CopyOrSwap(in_data, num_pixels, rgba, 0); 557 break; 558 case MODE_Argb: 559 CopyOrSwap(in_data, num_pixels, rgba, 0); 560 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); 561 break; 562 case MODE_RGBA_4444: 563 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 564 break; 565 case MODE_rgbA_4444: 566 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 567 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); 568 break; 569 case MODE_RGB_565: 570 VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba); 571 break; 572 default: 573 assert(0); // Code flow should not reach here. 574 } 575 } 576 577 //------------------------------------------------------------------------------ 578 579 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed; 580 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE; 581 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16]; 582 VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16]; 583 VP8LPredictorFunc VP8LPredictors[16]; 584 585 // exposed plain-C implementations 586 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16]; 587 588 VP8LTransformColorInverseFunc VP8LTransformColorInverse; 589 VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE; 590 591 VP8LConvertFunc VP8LConvertBGRAToRGB; 592 VP8LConvertFunc VP8LConvertBGRAToRGB_SSE; 593 VP8LConvertFunc VP8LConvertBGRAToRGBA; 594 VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE; 595 VP8LConvertFunc VP8LConvertBGRAToRGBA4444; 596 VP8LConvertFunc VP8LConvertBGRAToRGB565; 597 VP8LConvertFunc VP8LConvertBGRAToBGR; 598 599 VP8LMapARGBFunc VP8LMapColor32b; 600 VP8LMapAlphaFunc VP8LMapColor8b; 601 602 extern VP8CPUInfo VP8GetCPUInfo; 603 extern void VP8LDspInitSSE2(void); 604 extern void VP8LDspInitSSE41(void); 605 extern void VP8LDspInitAVX2(void); 606 extern void VP8LDspInitNEON(void); 607 extern void VP8LDspInitMIPSdspR2(void); 608 extern void VP8LDspInitMSA(void); 609 610 #define COPY_PREDICTOR_ARRAY(IN, OUT) do { \ 611 (OUT)[0] = IN##0_C; \ 612 (OUT)[1] = IN##1_C; \ 613 (OUT)[2] = IN##2_C; \ 614 (OUT)[3] = IN##3_C; \ 615 (OUT)[4] = IN##4_C; \ 616 (OUT)[5] = IN##5_C; \ 617 (OUT)[6] = IN##6_C; \ 618 (OUT)[7] = IN##7_C; \ 619 (OUT)[8] = IN##8_C; \ 620 (OUT)[9] = IN##9_C; \ 621 (OUT)[10] = IN##10_C; \ 622 (OUT)[11] = IN##11_C; \ 623 (OUT)[12] = IN##12_C; \ 624 (OUT)[13] = IN##13_C; \ 625 (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \ 626 (OUT)[15] = IN##0_C; \ 627 } while (0); 628 629 WEBP_DSP_INIT_FUNC(VP8LDspInit) { 630 COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors) 631 COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd) 632 COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C) 633 634 #if !WEBP_NEON_OMIT_C_CODE 635 VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C; 636 637 VP8LTransformColorInverse = VP8LTransformColorInverse_C; 638 639 VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C; 640 VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; 641 VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; 642 #endif 643 644 VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C; 645 VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; 646 647 VP8LMapColor32b = MapARGB_C; 648 VP8LMapColor8b = MapAlpha_C; 649 650 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 651 if (VP8GetCPUInfo != NULL) { 652 #if defined(WEBP_HAVE_SSE2) 653 if (VP8GetCPUInfo(kSSE2)) { 654 VP8LDspInitSSE2(); 655 #if defined(WEBP_HAVE_SSE41) 656 if (VP8GetCPUInfo(kSSE4_1)) { 657 VP8LDspInitSSE41(); 658 #if defined(WEBP_HAVE_AVX2) 659 if (VP8GetCPUInfo(kAVX2)) { 660 VP8LDspInitAVX2(); 661 } 662 #endif 663 } 664 #endif 665 } 666 #endif 667 #if defined(WEBP_USE_MIPS_DSP_R2) 668 if (VP8GetCPUInfo(kMIPSdspR2)) { 669 VP8LDspInitMIPSdspR2(); 670 } 671 #endif 672 #if defined(WEBP_USE_MSA) 673 if (VP8GetCPUInfo(kMSA)) { 674 VP8LDspInitMSA(); 675 } 676 #endif 677 } 678 679 #if defined(WEBP_HAVE_NEON) 680 if (WEBP_NEON_OMIT_C_CODE || 681 (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { 682 VP8LDspInitNEON(); 683 } 684 #endif 685 686 assert(VP8LAddGreenToBlueAndRed != NULL); 687 assert(VP8LTransformColorInverse != NULL); 688 assert(VP8LConvertBGRAToRGBA != NULL); 689 assert(VP8LConvertBGRAToRGB != NULL); 690 assert(VP8LConvertBGRAToBGR != NULL); 691 assert(VP8LConvertBGRAToRGBA4444 != NULL); 692 assert(VP8LConvertBGRAToRGB565 != NULL); 693 assert(VP8LMapColor32b != NULL); 694 assert(VP8LMapColor8b != NULL); 695 } 696 #undef COPY_PREDICTOR_ARRAY 697 698 //------------------------------------------------------------------------------