tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

dec.c (28153B)


      1 // Copyright 2010 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Speed-critical decoding functions, default plain-C implementations.
     11 //
     12 // Author: Skal (pascal.massimino@gmail.com)
     13 
     14 #include <assert.h>
     15 #include <stddef.h>
     16 #include <string.h>
     17 
     18 #include "src/dec/common_dec.h"
     19 #include "src/dec/vp8i_dec.h"
     20 #include "src/dsp/cpu.h"
     21 #include "src/dsp/dsp.h"
     22 #include "src/utils/utils.h"
     23 #include "src/webp/types.h"
     24 
     25 //------------------------------------------------------------------------------
     26 
     27 static WEBP_INLINE uint8_t clip_8b(int v) {
     28  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
     29 }
     30 
     31 //------------------------------------------------------------------------------
     32 // Transforms (Paragraph 14.4)
     33 
     34 #define STORE(x, y, v) \
     35  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
     36 
     37 #define STORE2(y, dc, d, c) do {    \
     38  const int DC = (dc);              \
     39  STORE(0, y, DC + (d));            \
     40  STORE(1, y, DC + (c));            \
     41  STORE(2, y, DC - (c));            \
     42  STORE(3, y, DC - (d));            \
     43 } while (0)
     44 
     45 #if !WEBP_NEON_OMIT_C_CODE
     46 static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
     47                           uint8_t* WEBP_RESTRICT dst) {
     48  int C[4 * 4], *tmp;
     49  int i;
     50  tmp = C;
     51  for (i = 0; i < 4; ++i) {    // vertical pass
     52    const int a = in[0] + in[8];    // [-4096, 4094]
     53    const int b = in[0] - in[8];    // [-4095, 4095]
     54    const int c = WEBP_TRANSFORM_AC3_MUL2(in[4]) -
     55                  WEBP_TRANSFORM_AC3_MUL1(in[12]);  // [-3783, 3783]
     56    const int d = WEBP_TRANSFORM_AC3_MUL1(in[4]) +
     57                  WEBP_TRANSFORM_AC3_MUL2(in[12]);  // [-3785, 3781]
     58    tmp[0] = a + d;   // [-7881, 7875]
     59    tmp[1] = b + c;   // [-7878, 7878]
     60    tmp[2] = b - c;   // [-7878, 7878]
     61    tmp[3] = a - d;   // [-7877, 7879]
     62    tmp += 4;
     63    in++;
     64  }
     65  // Each pass is expanding the dynamic range by ~3.85 (upper bound).
     66  // The exact value is (2. + (20091 + 35468) / 65536).
     67  // After the second pass, maximum interval is [-3794, 3794], assuming
     68  // an input in [-2048, 2047] interval. We then need to add a dst value
     69  // in the [0, 255] range.
     70  // In the worst case scenario, the input to clip_8b() can be as large as
     71  // [-60713, 60968].
     72  tmp = C;
     73  for (i = 0; i < 4; ++i) {    // horizontal pass
     74    const int dc = tmp[0] + 4;
     75    const int a =  dc +  tmp[8];
     76    const int b =  dc -  tmp[8];
     77    const int c =
     78        WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]);
     79    const int d =
     80        WEBP_TRANSFORM_AC3_MUL1(tmp[4]) + WEBP_TRANSFORM_AC3_MUL2(tmp[12]);
     81    STORE(0, 0, a + d);
     82    STORE(1, 0, b + c);
     83    STORE(2, 0, b - c);
     84    STORE(3, 0, a - d);
     85    tmp++;
     86    dst += BPS;
     87  }
     88 }
     89 
     90 // Simplified transform when only in[0], in[1] and in[4] are non-zero
     91 static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
     92                           uint8_t* WEBP_RESTRICT dst) {
     93  const int a = in[0] + 4;
     94  const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
     95  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
     96  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
     97  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
     98  STORE2(0, a + d4, d1, c1);
     99  STORE2(1, a + c4, d1, c1);
    100  STORE2(2, a - c4, d1, c1);
    101  STORE2(3, a - d4, d1, c1);
    102 }
    103 #undef STORE2
    104 
    105 static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
    106                           uint8_t* WEBP_RESTRICT dst, int do_two) {
    107  TransformOne_C(in, dst);
    108  if (do_two) {
    109    TransformOne_C(in + 16, dst + 4);
    110  }
    111 }
    112 #endif  // !WEBP_NEON_OMIT_C_CODE
    113 
    114 static void TransformUV_C(const int16_t* WEBP_RESTRICT in,
    115                          uint8_t* WEBP_RESTRICT dst) {
    116  VP8Transform(in + 0 * 16, dst, 1);
    117  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
    118 }
    119 
    120 #if !WEBP_NEON_OMIT_C_CODE
    121 static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
    122                          uint8_t* WEBP_RESTRICT dst) {
    123  const int DC = in[0] + 4;
    124  int i, j;
    125  for (j = 0; j < 4; ++j) {
    126    for (i = 0; i < 4; ++i) {
    127      STORE(i, j, DC);
    128    }
    129  }
    130 }
    131 #endif  // !WEBP_NEON_OMIT_C_CODE
    132 
    133 static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
    134                            uint8_t* WEBP_RESTRICT dst) {
    135  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
    136  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
    137  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
    138  if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
    139 }
    140 
    141 #undef STORE
    142 
    143 //------------------------------------------------------------------------------
    144 // Paragraph 14.3
    145 
    146 #if !WEBP_NEON_OMIT_C_CODE
    147 static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
    148                           int16_t* WEBP_RESTRICT out) {
    149  int tmp[16];
    150  int i;
    151  for (i = 0; i < 4; ++i) {
    152    const int a0 = in[0 + i] + in[12 + i];
    153    const int a1 = in[4 + i] + in[ 8 + i];
    154    const int a2 = in[4 + i] - in[ 8 + i];
    155    const int a3 = in[0 + i] - in[12 + i];
    156    tmp[0  + i] = a0 + a1;
    157    tmp[8  + i] = a0 - a1;
    158    tmp[4  + i] = a3 + a2;
    159    tmp[12 + i] = a3 - a2;
    160  }
    161  for (i = 0; i < 4; ++i) {
    162    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
    163    const int a0 = dc             + tmp[3 + i * 4];
    164    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
    165    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
    166    const int a3 = dc             - tmp[3 + i * 4];
    167    out[ 0] = (a0 + a1) >> 3;
    168    out[16] = (a3 + a2) >> 3;
    169    out[32] = (a0 - a1) >> 3;
    170    out[48] = (a3 - a2) >> 3;
    171    out += 64;
    172  }
    173 }
    174 #endif  // !WEBP_NEON_OMIT_C_CODE
    175 
    176 VP8WHT VP8TransformWHT;
    177 
    178 //------------------------------------------------------------------------------
    179 // Intra predictions
    180 
    181 #define DST(x, y) dst[(x) + (y) * BPS]
    182 
    183 #if !WEBP_NEON_OMIT_C_CODE
    184 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
    185  const uint8_t* top = dst - BPS;
    186  const uint8_t* const clip0 = VP8kclip1 - top[-1];
    187  int y;
    188  for (y = 0; y < size; ++y) {
    189    const uint8_t* const clip = clip0 + dst[-1];
    190    int x;
    191    for (x = 0; x < size; ++x) {
    192      dst[x] = clip[top[x]];
    193    }
    194    dst += BPS;
    195  }
    196 }
    197 static void TM4_C(uint8_t* dst)   { TrueMotion(dst, 4); }
    198 static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
    199 static void TM16_C(uint8_t* dst)  { TrueMotion(dst, 16); }
    200 
    201 //------------------------------------------------------------------------------
    202 // 16x16
    203 
    204 static void VE16_C(uint8_t* dst) {     // vertical
    205  int j;
    206  for (j = 0; j < 16; ++j) {
    207    memcpy(dst + j * BPS, dst - BPS, 16);
    208  }
    209 }
    210 
    211 static void HE16_C(uint8_t* dst) {     // horizontal
    212  int j;
    213  for (j = 16; j > 0; --j) {
    214    memset(dst, dst[-1], 16);
    215    dst += BPS;
    216  }
    217 }
    218 
    219 static WEBP_INLINE void Put16(int v, uint8_t* dst) {
    220  int j;
    221  for (j = 0; j < 16; ++j) {
    222    memset(dst + j * BPS, v, 16);
    223  }
    224 }
    225 
    226 static void DC16_C(uint8_t* dst) {    // DC
    227  int DC = 16;
    228  int j;
    229  for (j = 0; j < 16; ++j) {
    230    DC += dst[-1 + j * BPS] + dst[j - BPS];
    231  }
    232  Put16(DC >> 5, dst);
    233 }
    234 
    235 static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
    236  int DC = 8;
    237  int j;
    238  for (j = 0; j < 16; ++j) {
    239    DC += dst[-1 + j * BPS];
    240  }
    241  Put16(DC >> 4, dst);
    242 }
    243 
    244 static void DC16NoLeft_C(uint8_t* dst) {  // DC with left samples not available
    245  int DC = 8;
    246  int i;
    247  for (i = 0; i < 16; ++i) {
    248    DC += dst[i - BPS];
    249  }
    250  Put16(DC >> 4, dst);
    251 }
    252 
    253 static void DC16NoTopLeft_C(uint8_t* dst) {  // DC with no top and left samples
    254  Put16(0x80, dst);
    255 }
    256 #endif  // !WEBP_NEON_OMIT_C_CODE
    257 
    258 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
    259 
    260 //------------------------------------------------------------------------------
    261 // 4x4
    262 
    263 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
    264 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
    265 
    266 #if !WEBP_NEON_OMIT_C_CODE
    267 static void VE4_C(uint8_t* dst) {    // vertical
    268  const uint8_t* top = dst - BPS;
    269  const uint8_t vals[4] = {
    270    AVG3(top[-1], top[0], top[1]),
    271    AVG3(top[ 0], top[1], top[2]),
    272    AVG3(top[ 1], top[2], top[3]),
    273    AVG3(top[ 2], top[3], top[4])
    274  };
    275  int i;
    276  for (i = 0; i < 4; ++i) {
    277    memcpy(dst + i * BPS, vals, sizeof(vals));
    278  }
    279 }
    280 #endif  // !WEBP_NEON_OMIT_C_CODE
    281 
    282 static void HE4_C(uint8_t* dst) {    // horizontal
    283  const int A = dst[-1 - BPS];
    284  const int B = dst[-1];
    285  const int C = dst[-1 + BPS];
    286  const int D = dst[-1 + 2 * BPS];
    287  const int E = dst[-1 + 3 * BPS];
    288  WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C));
    289  WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D));
    290  WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E));
    291  WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
    292 }
    293 
    294 #if !WEBP_NEON_OMIT_C_CODE
    295 static void DC4_C(uint8_t* dst) {   // DC
    296  uint32_t dc = 4;
    297  int i;
    298  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
    299  dc >>= 3;
    300  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
    301 }
    302 
    303 static void RD4_C(uint8_t* dst) {   // Down-right
    304  const int I = dst[-1 + 0 * BPS];
    305  const int J = dst[-1 + 1 * BPS];
    306  const int K = dst[-1 + 2 * BPS];
    307  const int L = dst[-1 + 3 * BPS];
    308  const int X = dst[-1 - BPS];
    309  const int A = dst[0 - BPS];
    310  const int B = dst[1 - BPS];
    311  const int C = dst[2 - BPS];
    312  const int D = dst[3 - BPS];
    313  DST(0, 3)                                     = AVG3(J, K, L);
    314  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
    315  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
    316  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
    317              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
    318                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
    319                                      DST(3, 0) = AVG3(D, C, B);
    320 }
    321 
    322 static void LD4_C(uint8_t* dst) {   // Down-Left
    323  const int A = dst[0 - BPS];
    324  const int B = dst[1 - BPS];
    325  const int C = dst[2 - BPS];
    326  const int D = dst[3 - BPS];
    327  const int E = dst[4 - BPS];
    328  const int F = dst[5 - BPS];
    329  const int G = dst[6 - BPS];
    330  const int H = dst[7 - BPS];
    331  DST(0, 0)                                     = AVG3(A, B, C);
    332  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
    333  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
    334  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
    335              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
    336                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
    337                                      DST(3, 3) = AVG3(G, H, H);
    338 }
    339 #endif  // !WEBP_NEON_OMIT_C_CODE
    340 
    341 static void VR4_C(uint8_t* dst) {   // Vertical-Right
    342  const int I = dst[-1 + 0 * BPS];
    343  const int J = dst[-1 + 1 * BPS];
    344  const int K = dst[-1 + 2 * BPS];
    345  const int X = dst[-1 - BPS];
    346  const int A = dst[0 - BPS];
    347  const int B = dst[1 - BPS];
    348  const int C = dst[2 - BPS];
    349  const int D = dst[3 - BPS];
    350  DST(0, 0) = DST(1, 2) = AVG2(X, A);
    351  DST(1, 0) = DST(2, 2) = AVG2(A, B);
    352  DST(2, 0) = DST(3, 2) = AVG2(B, C);
    353  DST(3, 0)             = AVG2(C, D);
    354 
    355  DST(0, 3) =             AVG3(K, J, I);
    356  DST(0, 2) =             AVG3(J, I, X);
    357  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
    358  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
    359  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
    360  DST(3, 1) =             AVG3(B, C, D);
    361 }
    362 
    363 static void VL4_C(uint8_t* dst) {   // Vertical-Left
    364  const int A = dst[0 - BPS];
    365  const int B = dst[1 - BPS];
    366  const int C = dst[2 - BPS];
    367  const int D = dst[3 - BPS];
    368  const int E = dst[4 - BPS];
    369  const int F = dst[5 - BPS];
    370  const int G = dst[6 - BPS];
    371  const int H = dst[7 - BPS];
    372  DST(0, 0) =             AVG2(A, B);
    373  DST(1, 0) = DST(0, 2) = AVG2(B, C);
    374  DST(2, 0) = DST(1, 2) = AVG2(C, D);
    375  DST(3, 0) = DST(2, 2) = AVG2(D, E);
    376 
    377  DST(0, 1) =             AVG3(A, B, C);
    378  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
    379  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
    380  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
    381              DST(3, 2) = AVG3(E, F, G);
    382              DST(3, 3) = AVG3(F, G, H);
    383 }
    384 
    385 static void HU4_C(uint8_t* dst) {   // Horizontal-Up
    386  const int I = dst[-1 + 0 * BPS];
    387  const int J = dst[-1 + 1 * BPS];
    388  const int K = dst[-1 + 2 * BPS];
    389  const int L = dst[-1 + 3 * BPS];
    390  DST(0, 0) =             AVG2(I, J);
    391  DST(2, 0) = DST(0, 1) = AVG2(J, K);
    392  DST(2, 1) = DST(0, 2) = AVG2(K, L);
    393  DST(1, 0) =             AVG3(I, J, K);
    394  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
    395  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
    396  DST(3, 2) = DST(2, 2) =
    397    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
    398 }
    399 
    400 static void HD4_C(uint8_t* dst) {  // Horizontal-Down
    401  const int I = dst[-1 + 0 * BPS];
    402  const int J = dst[-1 + 1 * BPS];
    403  const int K = dst[-1 + 2 * BPS];
    404  const int L = dst[-1 + 3 * BPS];
    405  const int X = dst[-1 - BPS];
    406  const int A = dst[0 - BPS];
    407  const int B = dst[1 - BPS];
    408  const int C = dst[2 - BPS];
    409 
    410  DST(0, 0) = DST(2, 1) = AVG2(I, X);
    411  DST(0, 1) = DST(2, 2) = AVG2(J, I);
    412  DST(0, 2) = DST(2, 3) = AVG2(K, J);
    413  DST(0, 3)             = AVG2(L, K);
    414 
    415  DST(3, 0)             = AVG3(A, B, C);
    416  DST(2, 0)             = AVG3(X, A, B);
    417  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
    418  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
    419  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
    420  DST(1, 3)             = AVG3(L, K, J);
    421 }
    422 
    423 #undef DST
    424 #undef AVG3
    425 #undef AVG2
    426 
    427 VP8PredFunc VP8PredLuma4[NUM_BMODES];
    428 
    429 //------------------------------------------------------------------------------
    430 // Chroma
    431 
    432 #if !WEBP_NEON_OMIT_C_CODE
    433 static void VE8uv_C(uint8_t* dst) {    // vertical
    434  int j;
    435  for (j = 0; j < 8; ++j) {
    436    memcpy(dst + j * BPS, dst - BPS, 8);
    437  }
    438 }
    439 
    440 static void HE8uv_C(uint8_t* dst) {    // horizontal
    441  int j;
    442  for (j = 0; j < 8; ++j) {
    443    memset(dst, dst[-1], 8);
    444    dst += BPS;
    445  }
    446 }
    447 
    448 // helper for chroma-DC predictions
    449 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
    450  int j;
    451  for (j = 0; j < 8; ++j) {
    452    memset(dst + j * BPS, value, 8);
    453  }
    454 }
    455 
    456 static void DC8uv_C(uint8_t* dst) {     // DC
    457  int dc0 = 8;
    458  int i;
    459  for (i = 0; i < 8; ++i) {
    460    dc0 += dst[i - BPS] + dst[-1 + i * BPS];
    461  }
    462  Put8x8uv(dc0 >> 4, dst);
    463 }
    464 
    465 static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
    466  int dc0 = 4;
    467  int i;
    468  for (i = 0; i < 8; ++i) {
    469    dc0 += dst[i - BPS];
    470  }
    471  Put8x8uv(dc0 >> 3, dst);
    472 }
    473 
    474 static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
    475  int dc0 = 4;
    476  int i;
    477  for (i = 0; i < 8; ++i) {
    478    dc0 += dst[-1 + i * BPS];
    479  }
    480  Put8x8uv(dc0 >> 3, dst);
    481 }
    482 
    483 static void DC8uvNoTopLeft_C(uint8_t* dst) {    // DC with nothing
    484  Put8x8uv(0x80, dst);
    485 }
    486 #endif  // !WEBP_NEON_OMIT_C_CODE
    487 
    488 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
    489 
    490 //------------------------------------------------------------------------------
    491 // Edge filtering functions
    492 
    493 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    494 // 4 pixels in, 2 pixels out
    495 static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
    496  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
    497  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
    498  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
    499  const int a2 = VP8ksclip2[(a + 3) >> 3];
    500  p[-step] = VP8kclip1[p0 + a2];
    501  p[    0] = VP8kclip1[q0 - a1];
    502 }
    503 
    504 // 4 pixels in, 4 pixels out
    505 static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
    506  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
    507  const int a = 3 * (q0 - p0);
    508  const int a1 = VP8ksclip2[(a + 4) >> 3];
    509  const int a2 = VP8ksclip2[(a + 3) >> 3];
    510  const int a3 = (a1 + 1) >> 1;
    511  p[-2*step] = VP8kclip1[p1 + a3];
    512  p[-  step] = VP8kclip1[p0 + a2];
    513  p[      0] = VP8kclip1[q0 - a1];
    514  p[   step] = VP8kclip1[q1 - a3];
    515 }
    516 
    517 // 6 pixels in, 6 pixels out
    518 static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
    519  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
    520  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
    521  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
    522  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
    523  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
    524  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
    525  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
    526  p[-3*step] = VP8kclip1[p2 + a3];
    527  p[-2*step] = VP8kclip1[p1 + a2];
    528  p[-  step] = VP8kclip1[p0 + a1];
    529  p[      0] = VP8kclip1[q0 - a1];
    530  p[   step] = VP8kclip1[q1 - a2];
    531  p[ 2*step] = VP8kclip1[q2 - a3];
    532 }
    533 
    534 static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
    535  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
    536  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
    537 }
    538 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    539 
    540 #if !WEBP_NEON_OMIT_C_CODE
    541 static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
    542  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
    543  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
    544 }
    545 #endif  // !WEBP_NEON_OMIT_C_CODE
    546 
    547 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    548 static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
    549                                      int step, int t, int it) {
    550  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
    551  const int p0 = p[-step], q0 = p[0];
    552  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
    553  if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
    554  return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
    555         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
    556         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
    557 }
    558 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    559 
    560 //------------------------------------------------------------------------------
    561 // Simple In-loop filtering (Paragraph 15.2)
    562 
    563 #if !WEBP_NEON_OMIT_C_CODE
    564 static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
    565  int i;
    566  const int thresh2 = 2 * thresh + 1;
    567  for (i = 0; i < 16; ++i) {
    568    if (NeedsFilter_C(p + i, stride, thresh2)) {
    569      DoFilter2_C(p + i, stride);
    570    }
    571  }
    572 }
    573 
    574 static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
    575  int i;
    576  const int thresh2 = 2 * thresh + 1;
    577  for (i = 0; i < 16; ++i) {
    578    if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
    579      DoFilter2_C(p + i * stride, 1);
    580    }
    581  }
    582 }
    583 
    584 static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
    585  int k;
    586  for (k = 3; k > 0; --k) {
    587    p += 4 * stride;
    588    SimpleVFilter16_C(p, stride, thresh);
    589  }
    590 }
    591 
    592 static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
    593  int k;
    594  for (k = 3; k > 0; --k) {
    595    p += 4;
    596    SimpleHFilter16_C(p, stride, thresh);
    597  }
    598 }
    599 #endif  // !WEBP_NEON_OMIT_C_CODE
    600 
    601 //------------------------------------------------------------------------------
    602 // Complex In-loop filtering (Paragraph 15.3)
    603 
    604 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    605 static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
    606                                       int hstride, int vstride, int size,
    607                                       int thresh, int ithresh,
    608                                       int hev_thresh) {
    609  const int thresh2 = 2 * thresh + 1;
    610  while (size-- > 0) {
    611    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
    612      if (Hev(p, hstride, hev_thresh)) {
    613        DoFilter2_C(p, hstride);
    614      } else {
    615        DoFilter6_C(p, hstride);
    616      }
    617    }
    618    p += vstride;
    619  }
    620 }
    621 
    622 static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
    623                                       int hstride, int vstride, int size,
    624                                       int thresh, int ithresh,
    625                                       int hev_thresh) {
    626  const int thresh2 = 2 * thresh + 1;
    627  while (size-- > 0) {
    628    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
    629      if (Hev(p, hstride, hev_thresh)) {
    630        DoFilter2_C(p, hstride);
    631      } else {
    632        DoFilter4_C(p, hstride);
    633      }
    634    }
    635    p += vstride;
    636  }
    637 }
    638 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    639 
    640 #if !WEBP_NEON_OMIT_C_CODE
    641 // on macroblock edges
    642 static void VFilter16_C(uint8_t* p, int stride,
    643                        int thresh, int ithresh, int hev_thresh) {
    644  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
    645 }
    646 
    647 static void HFilter16_C(uint8_t* p, int stride,
    648                        int thresh, int ithresh, int hev_thresh) {
    649  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
    650 }
    651 
    652 // on three inner edges
    653 static void VFilter16i_C(uint8_t* p, int stride,
    654                         int thresh, int ithresh, int hev_thresh) {
    655  int k;
    656  for (k = 3; k > 0; --k) {
    657    p += 4 * stride;
    658    FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
    659  }
    660 }
    661 #endif  // !WEBP_NEON_OMIT_C_CODE
    662 
    663 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    664 static void HFilter16i_C(uint8_t* p, int stride,
    665                         int thresh, int ithresh, int hev_thresh) {
    666  int k;
    667  for (k = 3; k > 0; --k) {
    668    p += 4;
    669    FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
    670  }
    671 }
    672 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    673 
    674 #if !WEBP_NEON_OMIT_C_CODE
    675 // 8-pixels wide variant, for chroma filtering
    676 static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
    677                       int stride, int thresh, int ithresh, int hev_thresh) {
    678  FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
    679  FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
    680 }
    681 #endif  // !WEBP_NEON_OMIT_C_CODE
    682 
    683 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    684 static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
    685                       int stride, int thresh, int ithresh, int hev_thresh) {
    686  FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
    687  FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
    688 }
    689 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    690 
    691 #if !WEBP_NEON_OMIT_C_CODE
    692 static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
    693                        int stride, int thresh, int ithresh, int hev_thresh) {
    694  FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
    695  FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
    696 }
    697 #endif  // !WEBP_NEON_OMIT_C_CODE
    698 
    699 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    700 static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
    701                        int stride, int thresh, int ithresh, int hev_thresh) {
    702  FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
    703  FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
    704 }
    705 #endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    706 
    707 //------------------------------------------------------------------------------
    708 
    709 static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither,
    710                               uint8_t* WEBP_RESTRICT dst, int dst_stride) {
    711  int i, j;
    712  for (j = 0; j < 8; ++j) {
    713    for (i = 0; i < 8; ++i) {
    714      const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER;
    715      const int delta1 =
    716          (delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE;
    717      dst[i] = clip_8b((int)dst[i] + delta1);
    718    }
    719    dst += dst_stride;
    720    dither += 8;
    721  }
    722 }
    723 
    724 //------------------------------------------------------------------------------
    725 
    726 VP8DecIdct2 VP8Transform;
    727 VP8DecIdct VP8TransformAC3;
    728 VP8DecIdct VP8TransformUV;
    729 VP8DecIdct VP8TransformDC;
    730 VP8DecIdct VP8TransformDCUV;
    731 
    732 VP8LumaFilterFunc VP8VFilter16;
    733 VP8LumaFilterFunc VP8HFilter16;
    734 VP8ChromaFilterFunc VP8VFilter8;
    735 VP8ChromaFilterFunc VP8HFilter8;
    736 VP8LumaFilterFunc VP8VFilter16i;
    737 VP8LumaFilterFunc VP8HFilter16i;
    738 VP8ChromaFilterFunc VP8VFilter8i;
    739 VP8ChromaFilterFunc VP8HFilter8i;
    740 VP8SimpleFilterFunc VP8SimpleVFilter16;
    741 VP8SimpleFilterFunc VP8SimpleHFilter16;
    742 VP8SimpleFilterFunc VP8SimpleVFilter16i;
    743 VP8SimpleFilterFunc VP8SimpleHFilter16i;
    744 
    745 void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
    746                            uint8_t* WEBP_RESTRICT dst, int dst_stride);
    747 
    748 extern VP8CPUInfo VP8GetCPUInfo;
    749 extern void VP8DspInitSSE2(void);
    750 extern void VP8DspInitSSE41(void);
    751 extern void VP8DspInitNEON(void);
    752 extern void VP8DspInitMIPS32(void);
    753 extern void VP8DspInitMIPSdspR2(void);
    754 extern void VP8DspInitMSA(void);
    755 
    756 WEBP_DSP_INIT_FUNC(VP8DspInit) {
    757  VP8InitClipTables();
    758 
    759 #if !WEBP_NEON_OMIT_C_CODE
    760  VP8TransformWHT = TransformWHT_C;
    761  VP8Transform = TransformTwo_C;
    762  VP8TransformDC = TransformDC_C;
    763  VP8TransformAC3 = TransformAC3_C;
    764 #endif
    765  VP8TransformUV = TransformUV_C;
    766  VP8TransformDCUV = TransformDCUV_C;
    767 
    768 #if !WEBP_NEON_OMIT_C_CODE
    769  VP8VFilter16 = VFilter16_C;
    770  VP8VFilter16i = VFilter16i_C;
    771  VP8HFilter16 = HFilter16_C;
    772  VP8VFilter8 = VFilter8_C;
    773  VP8VFilter8i = VFilter8i_C;
    774  VP8SimpleVFilter16 = SimpleVFilter16_C;
    775  VP8SimpleHFilter16 = SimpleHFilter16_C;
    776  VP8SimpleVFilter16i = SimpleVFilter16i_C;
    777  VP8SimpleHFilter16i = SimpleHFilter16i_C;
    778 #endif
    779 
    780 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
    781  VP8HFilter16i = HFilter16i_C;
    782  VP8HFilter8 = HFilter8_C;
    783  VP8HFilter8i = HFilter8i_C;
    784 #endif
    785 
    786 #if !WEBP_NEON_OMIT_C_CODE
    787  VP8PredLuma4[0] = DC4_C;
    788  VP8PredLuma4[1] = TM4_C;
    789  VP8PredLuma4[2] = VE4_C;
    790  VP8PredLuma4[4] = RD4_C;
    791  VP8PredLuma4[6] = LD4_C;
    792 #endif
    793 
    794  VP8PredLuma4[3] = HE4_C;
    795  VP8PredLuma4[5] = VR4_C;
    796  VP8PredLuma4[7] = VL4_C;
    797  VP8PredLuma4[8] = HD4_C;
    798  VP8PredLuma4[9] = HU4_C;
    799 
    800 #if !WEBP_NEON_OMIT_C_CODE
    801  VP8PredLuma16[0] = DC16_C;
    802  VP8PredLuma16[1] = TM16_C;
    803  VP8PredLuma16[2] = VE16_C;
    804  VP8PredLuma16[3] = HE16_C;
    805  VP8PredLuma16[4] = DC16NoTop_C;
    806  VP8PredLuma16[5] = DC16NoLeft_C;
    807  VP8PredLuma16[6] = DC16NoTopLeft_C;
    808 
    809  VP8PredChroma8[0] = DC8uv_C;
    810  VP8PredChroma8[1] = TM8uv_C;
    811  VP8PredChroma8[2] = VE8uv_C;
    812  VP8PredChroma8[3] = HE8uv_C;
    813  VP8PredChroma8[4] = DC8uvNoTop_C;
    814  VP8PredChroma8[5] = DC8uvNoLeft_C;
    815  VP8PredChroma8[6] = DC8uvNoTopLeft_C;
    816 #endif
    817 
    818  VP8DitherCombine8x8 = DitherCombine8x8_C;
    819 
    820  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    821  if (VP8GetCPUInfo != NULL) {
    822 #if defined(WEBP_HAVE_SSE2)
    823    if (VP8GetCPUInfo(kSSE2)) {
    824      VP8DspInitSSE2();
    825 #if defined(WEBP_HAVE_SSE41)
    826      if (VP8GetCPUInfo(kSSE4_1)) {
    827        VP8DspInitSSE41();
    828      }
    829 #endif
    830    }
    831 #endif
    832 #if defined(WEBP_USE_MIPS32)
    833    if (VP8GetCPUInfo(kMIPS32)) {
    834      VP8DspInitMIPS32();
    835    }
    836 #endif
    837 #if defined(WEBP_USE_MIPS_DSP_R2)
    838    if (VP8GetCPUInfo(kMIPSdspR2)) {
    839      VP8DspInitMIPSdspR2();
    840    }
    841 #endif
    842 #if defined(WEBP_USE_MSA)
    843    if (VP8GetCPUInfo(kMSA)) {
    844      VP8DspInitMSA();
    845    }
    846 #endif
    847  }
    848 
    849 #if defined(WEBP_HAVE_NEON)
    850  if (WEBP_NEON_OMIT_C_CODE ||
    851      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
    852    VP8DspInitNEON();
    853  }
    854 #endif
    855 
    856  assert(VP8TransformWHT != NULL);
    857  assert(VP8Transform != NULL);
    858  assert(VP8TransformDC != NULL);
    859  assert(VP8TransformAC3 != NULL);
    860  assert(VP8TransformUV != NULL);
    861  assert(VP8TransformDCUV != NULL);
    862  assert(VP8VFilter16 != NULL);
    863  assert(VP8HFilter16 != NULL);
    864  assert(VP8VFilter8 != NULL);
    865  assert(VP8HFilter8 != NULL);
    866  assert(VP8VFilter16i != NULL);
    867  assert(VP8HFilter16i != NULL);
    868  assert(VP8VFilter8i != NULL);
    869  assert(VP8HFilter8i != NULL);
    870  assert(VP8SimpleVFilter16 != NULL);
    871  assert(VP8SimpleHFilter16 != NULL);
    872  assert(VP8SimpleVFilter16i != NULL);
    873  assert(VP8SimpleHFilter16i != NULL);
    874  assert(VP8PredLuma4[0] != NULL);
    875  assert(VP8PredLuma4[1] != NULL);
    876  assert(VP8PredLuma4[2] != NULL);
    877  assert(VP8PredLuma4[3] != NULL);
    878  assert(VP8PredLuma4[4] != NULL);
    879  assert(VP8PredLuma4[5] != NULL);
    880  assert(VP8PredLuma4[6] != NULL);
    881  assert(VP8PredLuma4[7] != NULL);
    882  assert(VP8PredLuma4[8] != NULL);
    883  assert(VP8PredLuma4[9] != NULL);
    884  assert(VP8PredLuma16[0] != NULL);
    885  assert(VP8PredLuma16[1] != NULL);
    886  assert(VP8PredLuma16[2] != NULL);
    887  assert(VP8PredLuma16[3] != NULL);
    888  assert(VP8PredLuma16[4] != NULL);
    889  assert(VP8PredLuma16[5] != NULL);
    890  assert(VP8PredLuma16[6] != NULL);
    891  assert(VP8PredChroma8[0] != NULL);
    892  assert(VP8PredChroma8[1] != NULL);
    893  assert(VP8PredChroma8[2] != NULL);
    894  assert(VP8PredChroma8[3] != NULL);
    895  assert(VP8PredChroma8[4] != NULL);
    896  assert(VP8PredChroma8[5] != NULL);
    897  assert(VP8PredChroma8[6] != NULL);
    898  assert(VP8DitherCombine8x8 != NULL);
    899 }