dec.c (28153B)
1 // Copyright 2010 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Speed-critical decoding functions, default plain-C implementations. 11 // 12 // Author: Skal (pascal.massimino@gmail.com) 13 14 #include <assert.h> 15 #include <stddef.h> 16 #include <string.h> 17 18 #include "src/dec/common_dec.h" 19 #include "src/dec/vp8i_dec.h" 20 #include "src/dsp/cpu.h" 21 #include "src/dsp/dsp.h" 22 #include "src/utils/utils.h" 23 #include "src/webp/types.h" 24 25 //------------------------------------------------------------------------------ 26 27 static WEBP_INLINE uint8_t clip_8b(int v) { 28 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 29 } 30 31 //------------------------------------------------------------------------------ 32 // Transforms (Paragraph 14.4) 33 34 #define STORE(x, y, v) \ 35 dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3)) 36 37 #define STORE2(y, dc, d, c) do { \ 38 const int DC = (dc); \ 39 STORE(0, y, DC + (d)); \ 40 STORE(1, y, DC + (c)); \ 41 STORE(2, y, DC - (c)); \ 42 STORE(3, y, DC - (d)); \ 43 } while (0) 44 45 #if !WEBP_NEON_OMIT_C_CODE 46 static void TransformOne_C(const int16_t* WEBP_RESTRICT in, 47 uint8_t* WEBP_RESTRICT dst) { 48 int C[4 * 4], *tmp; 49 int i; 50 tmp = C; 51 for (i = 0; i < 4; ++i) { // vertical pass 52 const int a = in[0] + in[8]; // [-4096, 4094] 53 const int b = in[0] - in[8]; // [-4095, 4095] 54 const int c = WEBP_TRANSFORM_AC3_MUL2(in[4]) - 55 WEBP_TRANSFORM_AC3_MUL1(in[12]); // [-3783, 3783] 56 const int d = WEBP_TRANSFORM_AC3_MUL1(in[4]) + 57 WEBP_TRANSFORM_AC3_MUL2(in[12]); // [-3785, 3781] 58 tmp[0] = a + d; // [-7881, 7875] 59 tmp[1] = b + c; // [-7878, 7878] 60 tmp[2] = b - c; // [-7878, 7878] 61 tmp[3] = a - d; // [-7877, 7879] 62 tmp += 4; 63 in++; 64 } 65 // Each pass is expanding the dynamic range by ~3.85 (upper bound). 66 // The exact value is (2. + (20091 + 35468) / 65536). 67 // After the second pass, maximum interval is [-3794, 3794], assuming 68 // an input in [-2048, 2047] interval. We then need to add a dst value 69 // in the [0, 255] range. 70 // In the worst case scenario, the input to clip_8b() can be as large as 71 // [-60713, 60968]. 72 tmp = C; 73 for (i = 0; i < 4; ++i) { // horizontal pass 74 const int dc = tmp[0] + 4; 75 const int a = dc + tmp[8]; 76 const int b = dc - tmp[8]; 77 const int c = 78 WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]); 79 const int d = 80 WEBP_TRANSFORM_AC3_MUL1(tmp[4]) + WEBP_TRANSFORM_AC3_MUL2(tmp[12]); 81 STORE(0, 0, a + d); 82 STORE(1, 0, b + c); 83 STORE(2, 0, b - c); 84 STORE(3, 0, a - d); 85 tmp++; 86 dst += BPS; 87 } 88 } 89 90 // Simplified transform when only in[0], in[1] and in[4] are non-zero 91 static void TransformAC3_C(const int16_t* WEBP_RESTRICT in, 92 uint8_t* WEBP_RESTRICT dst) { 93 const int a = in[0] + 4; 94 const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]); 95 const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]); 96 const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]); 97 const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]); 98 STORE2(0, a + d4, d1, c1); 99 STORE2(1, a + c4, d1, c1); 100 STORE2(2, a - c4, d1, c1); 101 STORE2(3, a - d4, d1, c1); 102 } 103 #undef STORE2 104 105 static void TransformTwo_C(const int16_t* WEBP_RESTRICT in, 106 uint8_t* WEBP_RESTRICT dst, int do_two) { 107 TransformOne_C(in, dst); 108 if (do_two) { 109 TransformOne_C(in + 16, dst + 4); 110 } 111 } 112 #endif // !WEBP_NEON_OMIT_C_CODE 113 114 static void TransformUV_C(const int16_t* WEBP_RESTRICT in, 115 uint8_t* WEBP_RESTRICT dst) { 116 VP8Transform(in + 0 * 16, dst, 1); 117 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); 118 } 119 120 #if !WEBP_NEON_OMIT_C_CODE 121 static void TransformDC_C(const int16_t* WEBP_RESTRICT in, 122 uint8_t* WEBP_RESTRICT dst) { 123 const int DC = in[0] + 4; 124 int i, j; 125 for (j = 0; j < 4; ++j) { 126 for (i = 0; i < 4; ++i) { 127 STORE(i, j, DC); 128 } 129 } 130 } 131 #endif // !WEBP_NEON_OMIT_C_CODE 132 133 static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in, 134 uint8_t* WEBP_RESTRICT dst) { 135 if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst); 136 if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4); 137 if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS); 138 if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4); 139 } 140 141 #undef STORE 142 143 //------------------------------------------------------------------------------ 144 // Paragraph 14.3 145 146 #if !WEBP_NEON_OMIT_C_CODE 147 static void TransformWHT_C(const int16_t* WEBP_RESTRICT in, 148 int16_t* WEBP_RESTRICT out) { 149 int tmp[16]; 150 int i; 151 for (i = 0; i < 4; ++i) { 152 const int a0 = in[0 + i] + in[12 + i]; 153 const int a1 = in[4 + i] + in[ 8 + i]; 154 const int a2 = in[4 + i] - in[ 8 + i]; 155 const int a3 = in[0 + i] - in[12 + i]; 156 tmp[0 + i] = a0 + a1; 157 tmp[8 + i] = a0 - a1; 158 tmp[4 + i] = a3 + a2; 159 tmp[12 + i] = a3 - a2; 160 } 161 for (i = 0; i < 4; ++i) { 162 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 163 const int a0 = dc + tmp[3 + i * 4]; 164 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 165 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 166 const int a3 = dc - tmp[3 + i * 4]; 167 out[ 0] = (a0 + a1) >> 3; 168 out[16] = (a3 + a2) >> 3; 169 out[32] = (a0 - a1) >> 3; 170 out[48] = (a3 - a2) >> 3; 171 out += 64; 172 } 173 } 174 #endif // !WEBP_NEON_OMIT_C_CODE 175 176 VP8WHT VP8TransformWHT; 177 178 //------------------------------------------------------------------------------ 179 // Intra predictions 180 181 #define DST(x, y) dst[(x) + (y) * BPS] 182 183 #if !WEBP_NEON_OMIT_C_CODE 184 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { 185 const uint8_t* top = dst - BPS; 186 const uint8_t* const clip0 = VP8kclip1 - top[-1]; 187 int y; 188 for (y = 0; y < size; ++y) { 189 const uint8_t* const clip = clip0 + dst[-1]; 190 int x; 191 for (x = 0; x < size; ++x) { 192 dst[x] = clip[top[x]]; 193 } 194 dst += BPS; 195 } 196 } 197 static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); } 198 static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); } 199 static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); } 200 201 //------------------------------------------------------------------------------ 202 // 16x16 203 204 static void VE16_C(uint8_t* dst) { // vertical 205 int j; 206 for (j = 0; j < 16; ++j) { 207 memcpy(dst + j * BPS, dst - BPS, 16); 208 } 209 } 210 211 static void HE16_C(uint8_t* dst) { // horizontal 212 int j; 213 for (j = 16; j > 0; --j) { 214 memset(dst, dst[-1], 16); 215 dst += BPS; 216 } 217 } 218 219 static WEBP_INLINE void Put16(int v, uint8_t* dst) { 220 int j; 221 for (j = 0; j < 16; ++j) { 222 memset(dst + j * BPS, v, 16); 223 } 224 } 225 226 static void DC16_C(uint8_t* dst) { // DC 227 int DC = 16; 228 int j; 229 for (j = 0; j < 16; ++j) { 230 DC += dst[-1 + j * BPS] + dst[j - BPS]; 231 } 232 Put16(DC >> 5, dst); 233 } 234 235 static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available 236 int DC = 8; 237 int j; 238 for (j = 0; j < 16; ++j) { 239 DC += dst[-1 + j * BPS]; 240 } 241 Put16(DC >> 4, dst); 242 } 243 244 static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available 245 int DC = 8; 246 int i; 247 for (i = 0; i < 16; ++i) { 248 DC += dst[i - BPS]; 249 } 250 Put16(DC >> 4, dst); 251 } 252 253 static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples 254 Put16(0x80, dst); 255 } 256 #endif // !WEBP_NEON_OMIT_C_CODE 257 258 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; 259 260 //------------------------------------------------------------------------------ 261 // 4x4 262 263 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2)) 264 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 265 266 #if !WEBP_NEON_OMIT_C_CODE 267 static void VE4_C(uint8_t* dst) { // vertical 268 const uint8_t* top = dst - BPS; 269 const uint8_t vals[4] = { 270 AVG3(top[-1], top[0], top[1]), 271 AVG3(top[ 0], top[1], top[2]), 272 AVG3(top[ 1], top[2], top[3]), 273 AVG3(top[ 2], top[3], top[4]) 274 }; 275 int i; 276 for (i = 0; i < 4; ++i) { 277 memcpy(dst + i * BPS, vals, sizeof(vals)); 278 } 279 } 280 #endif // !WEBP_NEON_OMIT_C_CODE 281 282 static void HE4_C(uint8_t* dst) { // horizontal 283 const int A = dst[-1 - BPS]; 284 const int B = dst[-1]; 285 const int C = dst[-1 + BPS]; 286 const int D = dst[-1 + 2 * BPS]; 287 const int E = dst[-1 + 3 * BPS]; 288 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C)); 289 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D)); 290 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E)); 291 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E)); 292 } 293 294 #if !WEBP_NEON_OMIT_C_CODE 295 static void DC4_C(uint8_t* dst) { // DC 296 uint32_t dc = 4; 297 int i; 298 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; 299 dc >>= 3; 300 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); 301 } 302 303 static void RD4_C(uint8_t* dst) { // Down-right 304 const int I = dst[-1 + 0 * BPS]; 305 const int J = dst[-1 + 1 * BPS]; 306 const int K = dst[-1 + 2 * BPS]; 307 const int L = dst[-1 + 3 * BPS]; 308 const int X = dst[-1 - BPS]; 309 const int A = dst[0 - BPS]; 310 const int B = dst[1 - BPS]; 311 const int C = dst[2 - BPS]; 312 const int D = dst[3 - BPS]; 313 DST(0, 3) = AVG3(J, K, L); 314 DST(1, 3) = DST(0, 2) = AVG3(I, J, K); 315 DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); 316 DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); 317 DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); 318 DST(3, 1) = DST(2, 0) = AVG3(C, B, A); 319 DST(3, 0) = AVG3(D, C, B); 320 } 321 322 static void LD4_C(uint8_t* dst) { // Down-Left 323 const int A = dst[0 - BPS]; 324 const int B = dst[1 - BPS]; 325 const int C = dst[2 - BPS]; 326 const int D = dst[3 - BPS]; 327 const int E = dst[4 - BPS]; 328 const int F = dst[5 - BPS]; 329 const int G = dst[6 - BPS]; 330 const int H = dst[7 - BPS]; 331 DST(0, 0) = AVG3(A, B, C); 332 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 333 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 334 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 335 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 336 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 337 DST(3, 3) = AVG3(G, H, H); 338 } 339 #endif // !WEBP_NEON_OMIT_C_CODE 340 341 static void VR4_C(uint8_t* dst) { // Vertical-Right 342 const int I = dst[-1 + 0 * BPS]; 343 const int J = dst[-1 + 1 * BPS]; 344 const int K = dst[-1 + 2 * BPS]; 345 const int X = dst[-1 - BPS]; 346 const int A = dst[0 - BPS]; 347 const int B = dst[1 - BPS]; 348 const int C = dst[2 - BPS]; 349 const int D = dst[3 - BPS]; 350 DST(0, 0) = DST(1, 2) = AVG2(X, A); 351 DST(1, 0) = DST(2, 2) = AVG2(A, B); 352 DST(2, 0) = DST(3, 2) = AVG2(B, C); 353 DST(3, 0) = AVG2(C, D); 354 355 DST(0, 3) = AVG3(K, J, I); 356 DST(0, 2) = AVG3(J, I, X); 357 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 358 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 359 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 360 DST(3, 1) = AVG3(B, C, D); 361 } 362 363 static void VL4_C(uint8_t* dst) { // Vertical-Left 364 const int A = dst[0 - BPS]; 365 const int B = dst[1 - BPS]; 366 const int C = dst[2 - BPS]; 367 const int D = dst[3 - BPS]; 368 const int E = dst[4 - BPS]; 369 const int F = dst[5 - BPS]; 370 const int G = dst[6 - BPS]; 371 const int H = dst[7 - BPS]; 372 DST(0, 0) = AVG2(A, B); 373 DST(1, 0) = DST(0, 2) = AVG2(B, C); 374 DST(2, 0) = DST(1, 2) = AVG2(C, D); 375 DST(3, 0) = DST(2, 2) = AVG2(D, E); 376 377 DST(0, 1) = AVG3(A, B, C); 378 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 379 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 380 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 381 DST(3, 2) = AVG3(E, F, G); 382 DST(3, 3) = AVG3(F, G, H); 383 } 384 385 static void HU4_C(uint8_t* dst) { // Horizontal-Up 386 const int I = dst[-1 + 0 * BPS]; 387 const int J = dst[-1 + 1 * BPS]; 388 const int K = dst[-1 + 2 * BPS]; 389 const int L = dst[-1 + 3 * BPS]; 390 DST(0, 0) = AVG2(I, J); 391 DST(2, 0) = DST(0, 1) = AVG2(J, K); 392 DST(2, 1) = DST(0, 2) = AVG2(K, L); 393 DST(1, 0) = AVG3(I, J, K); 394 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 395 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 396 DST(3, 2) = DST(2, 2) = 397 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 398 } 399 400 static void HD4_C(uint8_t* dst) { // Horizontal-Down 401 const int I = dst[-1 + 0 * BPS]; 402 const int J = dst[-1 + 1 * BPS]; 403 const int K = dst[-1 + 2 * BPS]; 404 const int L = dst[-1 + 3 * BPS]; 405 const int X = dst[-1 - BPS]; 406 const int A = dst[0 - BPS]; 407 const int B = dst[1 - BPS]; 408 const int C = dst[2 - BPS]; 409 410 DST(0, 0) = DST(2, 1) = AVG2(I, X); 411 DST(0, 1) = DST(2, 2) = AVG2(J, I); 412 DST(0, 2) = DST(2, 3) = AVG2(K, J); 413 DST(0, 3) = AVG2(L, K); 414 415 DST(3, 0) = AVG3(A, B, C); 416 DST(2, 0) = AVG3(X, A, B); 417 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 418 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 419 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 420 DST(1, 3) = AVG3(L, K, J); 421 } 422 423 #undef DST 424 #undef AVG3 425 #undef AVG2 426 427 VP8PredFunc VP8PredLuma4[NUM_BMODES]; 428 429 //------------------------------------------------------------------------------ 430 // Chroma 431 432 #if !WEBP_NEON_OMIT_C_CODE 433 static void VE8uv_C(uint8_t* dst) { // vertical 434 int j; 435 for (j = 0; j < 8; ++j) { 436 memcpy(dst + j * BPS, dst - BPS, 8); 437 } 438 } 439 440 static void HE8uv_C(uint8_t* dst) { // horizontal 441 int j; 442 for (j = 0; j < 8; ++j) { 443 memset(dst, dst[-1], 8); 444 dst += BPS; 445 } 446 } 447 448 // helper for chroma-DC predictions 449 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { 450 int j; 451 for (j = 0; j < 8; ++j) { 452 memset(dst + j * BPS, value, 8); 453 } 454 } 455 456 static void DC8uv_C(uint8_t* dst) { // DC 457 int dc0 = 8; 458 int i; 459 for (i = 0; i < 8; ++i) { 460 dc0 += dst[i - BPS] + dst[-1 + i * BPS]; 461 } 462 Put8x8uv(dc0 >> 4, dst); 463 } 464 465 static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples 466 int dc0 = 4; 467 int i; 468 for (i = 0; i < 8; ++i) { 469 dc0 += dst[i - BPS]; 470 } 471 Put8x8uv(dc0 >> 3, dst); 472 } 473 474 static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples 475 int dc0 = 4; 476 int i; 477 for (i = 0; i < 8; ++i) { 478 dc0 += dst[-1 + i * BPS]; 479 } 480 Put8x8uv(dc0 >> 3, dst); 481 } 482 483 static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing 484 Put8x8uv(0x80, dst); 485 } 486 #endif // !WEBP_NEON_OMIT_C_CODE 487 488 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; 489 490 //------------------------------------------------------------------------------ 491 // Edge filtering functions 492 493 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 494 // 4 pixels in, 2 pixels out 495 static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) { 496 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 497 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] 498 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] 499 const int a2 = VP8ksclip2[(a + 3) >> 3]; 500 p[-step] = VP8kclip1[p0 + a2]; 501 p[ 0] = VP8kclip1[q0 - a1]; 502 } 503 504 // 4 pixels in, 4 pixels out 505 static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) { 506 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 507 const int a = 3 * (q0 - p0); 508 const int a1 = VP8ksclip2[(a + 4) >> 3]; 509 const int a2 = VP8ksclip2[(a + 3) >> 3]; 510 const int a3 = (a1 + 1) >> 1; 511 p[-2*step] = VP8kclip1[p1 + a3]; 512 p[- step] = VP8kclip1[p0 + a2]; 513 p[ 0] = VP8kclip1[q0 - a1]; 514 p[ step] = VP8kclip1[q1 - a3]; 515 } 516 517 // 6 pixels in, 6 pixels out 518 static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) { 519 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; 520 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; 521 const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]]; 522 // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] 523 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 524 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 525 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 526 p[-3*step] = VP8kclip1[p2 + a3]; 527 p[-2*step] = VP8kclip1[p1 + a2]; 528 p[- step] = VP8kclip1[p0 + a1]; 529 p[ 0] = VP8kclip1[q0 - a1]; 530 p[ step] = VP8kclip1[q1 - a2]; 531 p[ 2*step] = VP8kclip1[q2 - a3]; 532 } 533 534 static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) { 535 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 536 return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh); 537 } 538 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 539 540 #if !WEBP_NEON_OMIT_C_CODE 541 static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) { 542 const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step]; 543 return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t); 544 } 545 #endif // !WEBP_NEON_OMIT_C_CODE 546 547 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 548 static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, 549 int step, int t, int it) { 550 const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step]; 551 const int p0 = p[-step], q0 = p[0]; 552 const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step]; 553 if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0; 554 return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it && 555 VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it && 556 VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it; 557 } 558 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 559 560 //------------------------------------------------------------------------------ 561 // Simple In-loop filtering (Paragraph 15.2) 562 563 #if !WEBP_NEON_OMIT_C_CODE 564 static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) { 565 int i; 566 const int thresh2 = 2 * thresh + 1; 567 for (i = 0; i < 16; ++i) { 568 if (NeedsFilter_C(p + i, stride, thresh2)) { 569 DoFilter2_C(p + i, stride); 570 } 571 } 572 } 573 574 static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) { 575 int i; 576 const int thresh2 = 2 * thresh + 1; 577 for (i = 0; i < 16; ++i) { 578 if (NeedsFilter_C(p + i * stride, 1, thresh2)) { 579 DoFilter2_C(p + i * stride, 1); 580 } 581 } 582 } 583 584 static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) { 585 int k; 586 for (k = 3; k > 0; --k) { 587 p += 4 * stride; 588 SimpleVFilter16_C(p, stride, thresh); 589 } 590 } 591 592 static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) { 593 int k; 594 for (k = 3; k > 0; --k) { 595 p += 4; 596 SimpleHFilter16_C(p, stride, thresh); 597 } 598 } 599 #endif // !WEBP_NEON_OMIT_C_CODE 600 601 //------------------------------------------------------------------------------ 602 // Complex In-loop filtering (Paragraph 15.3) 603 604 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 605 static WEBP_INLINE void FilterLoop26_C(uint8_t* p, 606 int hstride, int vstride, int size, 607 int thresh, int ithresh, 608 int hev_thresh) { 609 const int thresh2 = 2 * thresh + 1; 610 while (size-- > 0) { 611 if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { 612 if (Hev(p, hstride, hev_thresh)) { 613 DoFilter2_C(p, hstride); 614 } else { 615 DoFilter6_C(p, hstride); 616 } 617 } 618 p += vstride; 619 } 620 } 621 622 static WEBP_INLINE void FilterLoop24_C(uint8_t* p, 623 int hstride, int vstride, int size, 624 int thresh, int ithresh, 625 int hev_thresh) { 626 const int thresh2 = 2 * thresh + 1; 627 while (size-- > 0) { 628 if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { 629 if (Hev(p, hstride, hev_thresh)) { 630 DoFilter2_C(p, hstride); 631 } else { 632 DoFilter4_C(p, hstride); 633 } 634 } 635 p += vstride; 636 } 637 } 638 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 639 640 #if !WEBP_NEON_OMIT_C_CODE 641 // on macroblock edges 642 static void VFilter16_C(uint8_t* p, int stride, 643 int thresh, int ithresh, int hev_thresh) { 644 FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); 645 } 646 647 static void HFilter16_C(uint8_t* p, int stride, 648 int thresh, int ithresh, int hev_thresh) { 649 FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); 650 } 651 652 // on three inner edges 653 static void VFilter16i_C(uint8_t* p, int stride, 654 int thresh, int ithresh, int hev_thresh) { 655 int k; 656 for (k = 3; k > 0; --k) { 657 p += 4 * stride; 658 FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); 659 } 660 } 661 #endif // !WEBP_NEON_OMIT_C_CODE 662 663 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 664 static void HFilter16i_C(uint8_t* p, int stride, 665 int thresh, int ithresh, int hev_thresh) { 666 int k; 667 for (k = 3; k > 0; --k) { 668 p += 4; 669 FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); 670 } 671 } 672 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 673 674 #if !WEBP_NEON_OMIT_C_CODE 675 // 8-pixels wide variant, for chroma filtering 676 static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, 677 int stride, int thresh, int ithresh, int hev_thresh) { 678 FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh); 679 FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh); 680 } 681 #endif // !WEBP_NEON_OMIT_C_CODE 682 683 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 684 static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, 685 int stride, int thresh, int ithresh, int hev_thresh) { 686 FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh); 687 FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh); 688 } 689 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 690 691 #if !WEBP_NEON_OMIT_C_CODE 692 static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, 693 int stride, int thresh, int ithresh, int hev_thresh) { 694 FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); 695 FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); 696 } 697 #endif // !WEBP_NEON_OMIT_C_CODE 698 699 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 700 static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v, 701 int stride, int thresh, int ithresh, int hev_thresh) { 702 FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); 703 FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); 704 } 705 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 706 707 //------------------------------------------------------------------------------ 708 709 static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither, 710 uint8_t* WEBP_RESTRICT dst, int dst_stride) { 711 int i, j; 712 for (j = 0; j < 8; ++j) { 713 for (i = 0; i < 8; ++i) { 714 const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER; 715 const int delta1 = 716 (delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE; 717 dst[i] = clip_8b((int)dst[i] + delta1); 718 } 719 dst += dst_stride; 720 dither += 8; 721 } 722 } 723 724 //------------------------------------------------------------------------------ 725 726 VP8DecIdct2 VP8Transform; 727 VP8DecIdct VP8TransformAC3; 728 VP8DecIdct VP8TransformUV; 729 VP8DecIdct VP8TransformDC; 730 VP8DecIdct VP8TransformDCUV; 731 732 VP8LumaFilterFunc VP8VFilter16; 733 VP8LumaFilterFunc VP8HFilter16; 734 VP8ChromaFilterFunc VP8VFilter8; 735 VP8ChromaFilterFunc VP8HFilter8; 736 VP8LumaFilterFunc VP8VFilter16i; 737 VP8LumaFilterFunc VP8HFilter16i; 738 VP8ChromaFilterFunc VP8VFilter8i; 739 VP8ChromaFilterFunc VP8HFilter8i; 740 VP8SimpleFilterFunc VP8SimpleVFilter16; 741 VP8SimpleFilterFunc VP8SimpleHFilter16; 742 VP8SimpleFilterFunc VP8SimpleVFilter16i; 743 VP8SimpleFilterFunc VP8SimpleHFilter16i; 744 745 void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither, 746 uint8_t* WEBP_RESTRICT dst, int dst_stride); 747 748 extern VP8CPUInfo VP8GetCPUInfo; 749 extern void VP8DspInitSSE2(void); 750 extern void VP8DspInitSSE41(void); 751 extern void VP8DspInitNEON(void); 752 extern void VP8DspInitMIPS32(void); 753 extern void VP8DspInitMIPSdspR2(void); 754 extern void VP8DspInitMSA(void); 755 756 WEBP_DSP_INIT_FUNC(VP8DspInit) { 757 VP8InitClipTables(); 758 759 #if !WEBP_NEON_OMIT_C_CODE 760 VP8TransformWHT = TransformWHT_C; 761 VP8Transform = TransformTwo_C; 762 VP8TransformDC = TransformDC_C; 763 VP8TransformAC3 = TransformAC3_C; 764 #endif 765 VP8TransformUV = TransformUV_C; 766 VP8TransformDCUV = TransformDCUV_C; 767 768 #if !WEBP_NEON_OMIT_C_CODE 769 VP8VFilter16 = VFilter16_C; 770 VP8VFilter16i = VFilter16i_C; 771 VP8HFilter16 = HFilter16_C; 772 VP8VFilter8 = VFilter8_C; 773 VP8VFilter8i = VFilter8i_C; 774 VP8SimpleVFilter16 = SimpleVFilter16_C; 775 VP8SimpleHFilter16 = SimpleHFilter16_C; 776 VP8SimpleVFilter16i = SimpleVFilter16i_C; 777 VP8SimpleHFilter16i = SimpleHFilter16i_C; 778 #endif 779 780 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 781 VP8HFilter16i = HFilter16i_C; 782 VP8HFilter8 = HFilter8_C; 783 VP8HFilter8i = HFilter8i_C; 784 #endif 785 786 #if !WEBP_NEON_OMIT_C_CODE 787 VP8PredLuma4[0] = DC4_C; 788 VP8PredLuma4[1] = TM4_C; 789 VP8PredLuma4[2] = VE4_C; 790 VP8PredLuma4[4] = RD4_C; 791 VP8PredLuma4[6] = LD4_C; 792 #endif 793 794 VP8PredLuma4[3] = HE4_C; 795 VP8PredLuma4[5] = VR4_C; 796 VP8PredLuma4[7] = VL4_C; 797 VP8PredLuma4[8] = HD4_C; 798 VP8PredLuma4[9] = HU4_C; 799 800 #if !WEBP_NEON_OMIT_C_CODE 801 VP8PredLuma16[0] = DC16_C; 802 VP8PredLuma16[1] = TM16_C; 803 VP8PredLuma16[2] = VE16_C; 804 VP8PredLuma16[3] = HE16_C; 805 VP8PredLuma16[4] = DC16NoTop_C; 806 VP8PredLuma16[5] = DC16NoLeft_C; 807 VP8PredLuma16[6] = DC16NoTopLeft_C; 808 809 VP8PredChroma8[0] = DC8uv_C; 810 VP8PredChroma8[1] = TM8uv_C; 811 VP8PredChroma8[2] = VE8uv_C; 812 VP8PredChroma8[3] = HE8uv_C; 813 VP8PredChroma8[4] = DC8uvNoTop_C; 814 VP8PredChroma8[5] = DC8uvNoLeft_C; 815 VP8PredChroma8[6] = DC8uvNoTopLeft_C; 816 #endif 817 818 VP8DitherCombine8x8 = DitherCombine8x8_C; 819 820 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 821 if (VP8GetCPUInfo != NULL) { 822 #if defined(WEBP_HAVE_SSE2) 823 if (VP8GetCPUInfo(kSSE2)) { 824 VP8DspInitSSE2(); 825 #if defined(WEBP_HAVE_SSE41) 826 if (VP8GetCPUInfo(kSSE4_1)) { 827 VP8DspInitSSE41(); 828 } 829 #endif 830 } 831 #endif 832 #if defined(WEBP_USE_MIPS32) 833 if (VP8GetCPUInfo(kMIPS32)) { 834 VP8DspInitMIPS32(); 835 } 836 #endif 837 #if defined(WEBP_USE_MIPS_DSP_R2) 838 if (VP8GetCPUInfo(kMIPSdspR2)) { 839 VP8DspInitMIPSdspR2(); 840 } 841 #endif 842 #if defined(WEBP_USE_MSA) 843 if (VP8GetCPUInfo(kMSA)) { 844 VP8DspInitMSA(); 845 } 846 #endif 847 } 848 849 #if defined(WEBP_HAVE_NEON) 850 if (WEBP_NEON_OMIT_C_CODE || 851 (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { 852 VP8DspInitNEON(); 853 } 854 #endif 855 856 assert(VP8TransformWHT != NULL); 857 assert(VP8Transform != NULL); 858 assert(VP8TransformDC != NULL); 859 assert(VP8TransformAC3 != NULL); 860 assert(VP8TransformUV != NULL); 861 assert(VP8TransformDCUV != NULL); 862 assert(VP8VFilter16 != NULL); 863 assert(VP8HFilter16 != NULL); 864 assert(VP8VFilter8 != NULL); 865 assert(VP8HFilter8 != NULL); 866 assert(VP8VFilter16i != NULL); 867 assert(VP8HFilter16i != NULL); 868 assert(VP8VFilter8i != NULL); 869 assert(VP8HFilter8i != NULL); 870 assert(VP8SimpleVFilter16 != NULL); 871 assert(VP8SimpleHFilter16 != NULL); 872 assert(VP8SimpleVFilter16i != NULL); 873 assert(VP8SimpleHFilter16i != NULL); 874 assert(VP8PredLuma4[0] != NULL); 875 assert(VP8PredLuma4[1] != NULL); 876 assert(VP8PredLuma4[2] != NULL); 877 assert(VP8PredLuma4[3] != NULL); 878 assert(VP8PredLuma4[4] != NULL); 879 assert(VP8PredLuma4[5] != NULL); 880 assert(VP8PredLuma4[6] != NULL); 881 assert(VP8PredLuma4[7] != NULL); 882 assert(VP8PredLuma4[8] != NULL); 883 assert(VP8PredLuma4[9] != NULL); 884 assert(VP8PredLuma16[0] != NULL); 885 assert(VP8PredLuma16[1] != NULL); 886 assert(VP8PredLuma16[2] != NULL); 887 assert(VP8PredLuma16[3] != NULL); 888 assert(VP8PredLuma16[4] != NULL); 889 assert(VP8PredLuma16[5] != NULL); 890 assert(VP8PredLuma16[6] != NULL); 891 assert(VP8PredChroma8[0] != NULL); 892 assert(VP8PredChroma8[1] != NULL); 893 assert(VP8PredChroma8[2] != NULL); 894 assert(VP8PredChroma8[3] != NULL); 895 assert(VP8PredChroma8[4] != NULL); 896 assert(VP8PredChroma8[5] != NULL); 897 assert(VP8PredChroma8[6] != NULL); 898 assert(VP8DitherCombine8x8 != NULL); 899 }