intrapred.c (30847B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <math.h> 14 15 #include "config/aom_config.h" 16 #include "config/aom_dsp_rtcd.h" 17 18 #include "aom_dsp/aom_dsp_common.h" 19 #include "aom_dsp/intrapred_common.h" 20 #include "aom_mem/aom_mem.h" 21 #include "aom_ports/bitops.h" 22 23 static inline void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 24 const uint8_t *above, const uint8_t *left) { 25 int r; 26 (void)left; 27 28 for (r = 0; r < bh; r++) { 29 memcpy(dst, above, bw); 30 dst += stride; 31 } 32 } 33 34 static inline void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 35 const uint8_t *above, const uint8_t *left) { 36 int r; 37 (void)above; 38 39 for (r = 0; r < bh; r++) { 40 memset(dst, left[r], bw); 41 dst += stride; 42 } 43 } 44 45 static inline int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; } 46 47 static inline uint16_t paeth_predictor_single(uint16_t left, uint16_t top, 48 uint16_t top_left) { 49 const int base = top + left - top_left; 50 const int p_left = abs_diff(base, left); 51 const int p_top = abs_diff(base, top); 52 const int p_top_left = abs_diff(base, top_left); 53 54 // Return nearest to base of left, top and top_left. 55 return (p_left <= p_top && p_left <= p_top_left) ? left 56 : (p_top <= p_top_left) ? top 57 : top_left; 58 } 59 60 static inline void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 61 int bh, const uint8_t *above, 62 const uint8_t *left) { 63 int r, c; 64 const uint8_t ytop_left = above[-1]; 65 66 for (r = 0; r < bh; r++) { 67 for (c = 0; c < bw; c++) 68 dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left); 69 dst += stride; 70 } 71 } 72 73 // Some basic checks on weights for smooth predictor. 74 #define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \ 75 pred_scale) \ 76 assert(weights_w[0] < weights_scale); \ 77 assert(weights_h[0] < weights_scale); \ 78 assert(weights_scale - weights_w[bw - 1] < weights_scale); \ 79 assert(weights_scale - weights_h[bh - 1] < weights_scale); \ 80 assert(pred_scale < 31) // ensures no overflow when calculating predictor. 81 82 #define divide_round(value, bits) (((value) + (1 << ((bits) - 1))) >> (bits)) 83 84 static inline void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 85 int bh, const uint8_t *above, 86 const uint8_t *left) { 87 const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel 88 const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel 89 const uint8_t *const sm_weights_w = smooth_weights + bw - 4; 90 const uint8_t *const sm_weights_h = smooth_weights + bh - 4; 91 // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE 92 const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE; 93 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 94 sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale, 95 log2_scale + sizeof(*dst)); 96 int r; 97 for (r = 0; r < bh; ++r) { 98 int c; 99 for (c = 0; c < bw; ++c) { 100 const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred }; 101 const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r], 102 sm_weights_w[c], scale - sm_weights_w[c] }; 103 uint32_t this_pred = 0; 104 int i; 105 assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]); 106 for (i = 0; i < 4; ++i) { 107 this_pred += weights[i] * pixels[i]; 108 } 109 dst[c] = divide_round(this_pred, log2_scale); 110 } 111 dst += stride; 112 } 113 } 114 115 static inline void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 116 int bh, const uint8_t *above, 117 const uint8_t *left) { 118 const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel 119 const uint8_t *const sm_weights = smooth_weights + bh - 4; 120 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE 121 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE; 122 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 123 sm_weights_sanity_checks(sm_weights, sm_weights, scale, 124 log2_scale + sizeof(*dst)); 125 126 int r; 127 for (r = 0; r < bh; r++) { 128 int c; 129 for (c = 0; c < bw; ++c) { 130 const uint8_t pixels[] = { above[c], below_pred }; 131 const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] }; 132 uint32_t this_pred = 0; 133 assert(scale >= sm_weights[r]); 134 int i; 135 for (i = 0; i < 2; ++i) { 136 this_pred += weights[i] * pixels[i]; 137 } 138 dst[c] = divide_round(this_pred, log2_scale); 139 } 140 dst += stride; 141 } 142 } 143 144 static inline void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 145 int bh, const uint8_t *above, 146 const uint8_t *left) { 147 const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel 148 const uint8_t *const sm_weights = smooth_weights + bw - 4; 149 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE 150 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE; 151 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 152 sm_weights_sanity_checks(sm_weights, sm_weights, scale, 153 log2_scale + sizeof(*dst)); 154 155 int r; 156 for (r = 0; r < bh; r++) { 157 int c; 158 for (c = 0; c < bw; ++c) { 159 const uint8_t pixels[] = { left[r], right_pred }; 160 const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] }; 161 uint32_t this_pred = 0; 162 assert(scale >= sm_weights[c]); 163 int i; 164 for (i = 0; i < 2; ++i) { 165 this_pred += weights[i] * pixels[i]; 166 } 167 dst[c] = divide_round(this_pred, log2_scale); 168 } 169 dst += stride; 170 } 171 } 172 173 static inline void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 174 int bh, const uint8_t *above, 175 const uint8_t *left) { 176 int r; 177 (void)above; 178 (void)left; 179 180 for (r = 0; r < bh; r++) { 181 memset(dst, 128, bw); 182 dst += stride; 183 } 184 } 185 186 static inline void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 187 int bh, const uint8_t *above, 188 const uint8_t *left) { 189 int i, r, expected_dc, sum = 0; 190 (void)above; 191 192 for (i = 0; i < bh; i++) sum += left[i]; 193 expected_dc = (sum + (bh >> 1)) / bh; 194 195 for (r = 0; r < bh; r++) { 196 memset(dst, expected_dc, bw); 197 dst += stride; 198 } 199 } 200 201 static inline void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw, 202 int bh, const uint8_t *above, 203 const uint8_t *left) { 204 int i, r, expected_dc, sum = 0; 205 (void)left; 206 207 for (i = 0; i < bw; i++) sum += above[i]; 208 expected_dc = (sum + (bw >> 1)) / bw; 209 210 for (r = 0; r < bh; r++) { 211 memset(dst, expected_dc, bw); 212 dst += stride; 213 } 214 } 215 216 static inline void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 217 const uint8_t *above, const uint8_t *left) { 218 int i, r, expected_dc, sum = 0; 219 const int count = bw + bh; 220 221 for (i = 0; i < bw; i++) { 222 sum += above[i]; 223 } 224 for (i = 0; i < bh; i++) { 225 sum += left[i]; 226 } 227 228 expected_dc = (sum + (count >> 1)) / count; 229 230 for (r = 0; r < bh; r++) { 231 memset(dst, expected_dc, bw); 232 dst += stride; 233 } 234 } 235 236 static inline int divide_using_multiply_shift(int num, int shift1, 237 int multiplier, int shift2) { 238 const int interm = num >> shift1; 239 return interm * multiplier >> shift2; 240 } 241 242 // The constants (multiplier and shifts) for a given block size are obtained 243 // as follows: 244 // - Let sum_w_h = block width + block height. 245 // - Shift 'sum_w_h' right until we reach an odd number. Let the number of 246 // shifts for that block size be called 'shift1' (see the parameter in 247 // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2 248 // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect 249 // block]. 250 // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5, 251 // using the "Algorithm 1" in: 252 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632 253 // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd 254 // shift will be 16, regardless of the block size. 255 256 // Note: For low bitdepth, assembly code may be optimized by using smaller 257 // constants for smaller block sizes, where the range of the 'sum' is 258 // restricted to fewer bits. 259 260 #define DC_MULTIPLIER_1X2 0x5556 261 #define DC_MULTIPLIER_1X4 0x3334 262 263 #define DC_SHIFT2 16 264 265 static inline void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw, 266 int bh, const uint8_t *above, 267 const uint8_t *left, int shift1, 268 int multiplier) { 269 int sum = 0; 270 271 for (int i = 0; i < bw; i++) { 272 sum += above[i]; 273 } 274 for (int i = 0; i < bh; i++) { 275 sum += left[i]; 276 } 277 278 const int expected_dc = divide_using_multiply_shift( 279 sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2); 280 assert(expected_dc < (1 << 8)); 281 282 for (int r = 0; r < bh; r++) { 283 memset(dst, expected_dc, bw); 284 dst += stride; 285 } 286 } 287 288 #undef DC_SHIFT2 289 290 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride, 291 const uint8_t *above, const uint8_t *left) { 292 dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2); 293 } 294 295 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride, 296 const uint8_t *above, const uint8_t *left) { 297 dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2); 298 } 299 300 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 301 void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride, 302 const uint8_t *above, const uint8_t *left) { 303 dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4); 304 } 305 306 void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride, 307 const uint8_t *above, const uint8_t *left) { 308 dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4); 309 } 310 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 311 312 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride, 313 const uint8_t *above, const uint8_t *left) { 314 dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2); 315 } 316 317 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride, 318 const uint8_t *above, const uint8_t *left) { 319 dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2); 320 } 321 322 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 323 void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride, 324 const uint8_t *above, const uint8_t *left) { 325 dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4); 326 } 327 328 void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride, 329 const uint8_t *above, const uint8_t *left) { 330 dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4); 331 } 332 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 333 334 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride, 335 const uint8_t *above, const uint8_t *left) { 336 dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2); 337 } 338 339 void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride, 340 const uint8_t *above, const uint8_t *left) { 341 dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2); 342 } 343 344 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 345 void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride, 346 const uint8_t *above, const uint8_t *left) { 347 dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4); 348 } 349 350 void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride, 351 const uint8_t *above, const uint8_t *left) { 352 dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4); 353 } 354 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 355 356 void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride, 357 const uint8_t *above, const uint8_t *left) { 358 dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2); 359 } 360 361 void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride, 362 const uint8_t *above, const uint8_t *left) { 363 dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2); 364 } 365 366 #undef DC_MULTIPLIER_1X2 367 #undef DC_MULTIPLIER_1X4 368 369 #if CONFIG_AV1_HIGHBITDEPTH 370 371 static inline void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw, 372 int bh, const uint16_t *above, 373 const uint16_t *left, int bd) { 374 int r; 375 (void)left; 376 (void)bd; 377 for (r = 0; r < bh; r++) { 378 memcpy(dst, above, bw * sizeof(uint16_t)); 379 dst += stride; 380 } 381 } 382 383 static inline void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw, 384 int bh, const uint16_t *above, 385 const uint16_t *left, int bd) { 386 int r; 387 (void)above; 388 (void)bd; 389 for (r = 0; r < bh; r++) { 390 aom_memset16(dst, left[r], bw); 391 dst += stride; 392 } 393 } 394 395 static inline void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride, 396 int bw, int bh, const uint16_t *above, 397 const uint16_t *left, int bd) { 398 int r, c; 399 const uint16_t ytop_left = above[-1]; 400 (void)bd; 401 402 for (r = 0; r < bh; r++) { 403 for (c = 0; c < bw; c++) 404 dst[c] = paeth_predictor_single(left[r], above[c], ytop_left); 405 dst += stride; 406 } 407 } 408 409 static inline void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride, 410 int bw, int bh, 411 const uint16_t *above, 412 const uint16_t *left, int bd) { 413 (void)bd; 414 const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel 415 const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel 416 const uint8_t *const sm_weights_w = smooth_weights + bw - 4; 417 const uint8_t *const sm_weights_h = smooth_weights + bh - 4; 418 // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE 419 const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE; 420 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 421 sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale, 422 log2_scale + sizeof(*dst)); 423 int r; 424 for (r = 0; r < bh; ++r) { 425 int c; 426 for (c = 0; c < bw; ++c) { 427 const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred }; 428 const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r], 429 sm_weights_w[c], scale - sm_weights_w[c] }; 430 uint32_t this_pred = 0; 431 int i; 432 assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]); 433 for (i = 0; i < 4; ++i) { 434 this_pred += weights[i] * pixels[i]; 435 } 436 dst[c] = divide_round(this_pred, log2_scale); 437 } 438 dst += stride; 439 } 440 } 441 442 static inline void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride, 443 int bw, int bh, 444 const uint16_t *above, 445 const uint16_t *left, int bd) { 446 (void)bd; 447 const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel 448 const uint8_t *const sm_weights = smooth_weights + bh - 4; 449 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE 450 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE; 451 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 452 sm_weights_sanity_checks(sm_weights, sm_weights, scale, 453 log2_scale + sizeof(*dst)); 454 455 int r; 456 for (r = 0; r < bh; r++) { 457 int c; 458 for (c = 0; c < bw; ++c) { 459 const uint16_t pixels[] = { above[c], below_pred }; 460 const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] }; 461 uint32_t this_pred = 0; 462 assert(scale >= sm_weights[r]); 463 int i; 464 for (i = 0; i < 2; ++i) { 465 this_pred += weights[i] * pixels[i]; 466 } 467 dst[c] = divide_round(this_pred, log2_scale); 468 } 469 dst += stride; 470 } 471 } 472 473 static inline void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride, 474 int bw, int bh, 475 const uint16_t *above, 476 const uint16_t *left, int bd) { 477 (void)bd; 478 const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel 479 const uint8_t *const sm_weights = smooth_weights + bw - 4; 480 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE 481 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE; 482 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE); 483 sm_weights_sanity_checks(sm_weights, sm_weights, scale, 484 log2_scale + sizeof(*dst)); 485 486 int r; 487 for (r = 0; r < bh; r++) { 488 int c; 489 for (c = 0; c < bw; ++c) { 490 const uint16_t pixels[] = { left[r], right_pred }; 491 const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] }; 492 uint32_t this_pred = 0; 493 assert(scale >= sm_weights[c]); 494 int i; 495 for (i = 0; i < 2; ++i) { 496 this_pred += weights[i] * pixels[i]; 497 } 498 dst[c] = divide_round(this_pred, log2_scale); 499 } 500 dst += stride; 501 } 502 } 503 504 static inline void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, 505 int bw, int bh, 506 const uint16_t *above, 507 const uint16_t *left, int bd) { 508 int r; 509 (void)above; 510 (void)left; 511 512 for (r = 0; r < bh; r++) { 513 aom_memset16(dst, 128 << (bd - 8), bw); 514 dst += stride; 515 } 516 } 517 518 static inline void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, 519 int bw, int bh, 520 const uint16_t *above, 521 const uint16_t *left, int bd) { 522 int i, r, expected_dc, sum = 0; 523 (void)above; 524 (void)bd; 525 526 for (i = 0; i < bh; i++) sum += left[i]; 527 expected_dc = (sum + (bh >> 1)) / bh; 528 529 for (r = 0; r < bh; r++) { 530 aom_memset16(dst, expected_dc, bw); 531 dst += stride; 532 } 533 } 534 535 static inline void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, 536 int bw, int bh, 537 const uint16_t *above, 538 const uint16_t *left, int bd) { 539 int i, r, expected_dc, sum = 0; 540 (void)left; 541 (void)bd; 542 543 for (i = 0; i < bw; i++) sum += above[i]; 544 expected_dc = (sum + (bw >> 1)) / bw; 545 546 for (r = 0; r < bh; r++) { 547 aom_memset16(dst, expected_dc, bw); 548 dst += stride; 549 } 550 } 551 552 static inline void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, 553 int bh, const uint16_t *above, 554 const uint16_t *left, int bd) { 555 int i, r, expected_dc, sum = 0; 556 const int count = bw + bh; 557 (void)bd; 558 559 for (i = 0; i < bw; i++) { 560 sum += above[i]; 561 } 562 for (i = 0; i < bh; i++) { 563 sum += left[i]; 564 } 565 566 expected_dc = (sum + (count >> 1)) / count; 567 568 for (r = 0; r < bh; r++) { 569 aom_memset16(dst, expected_dc, bw); 570 dst += stride; 571 } 572 } 573 574 // Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but 575 // assume 2nd shift of 17 bits instead of 16. 576 // Note: Strictly speaking, 2nd shift needs to be 17 only when: 577 // - bit depth == 12, and 578 // - bw + bh is divisible by 5 (as opposed to divisible by 3). 579 // All other cases can use half the multipliers with a shift of 16 instead. 580 // This special optimization can be used when writing assembly code. 581 #define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB 582 // Note: This constant is odd, but a smaller even constant (0x199a) with the 583 // appropriate shift should work for neon in 8/10-bit. 584 #define HIGHBD_DC_MULTIPLIER_1X4 0x6667 585 586 #define HIGHBD_DC_SHIFT2 17 587 588 static inline void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride, 589 int bw, int bh, 590 const uint16_t *above, 591 const uint16_t *left, int bd, 592 int shift1, uint32_t multiplier) { 593 int sum = 0; 594 (void)bd; 595 596 for (int i = 0; i < bw; i++) { 597 sum += above[i]; 598 } 599 for (int i = 0; i < bh; i++) { 600 sum += left[i]; 601 } 602 603 const int expected_dc = divide_using_multiply_shift( 604 sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2); 605 assert(expected_dc < (1 << bd)); 606 607 for (int r = 0; r < bh; r++) { 608 aom_memset16(dst, expected_dc, bw); 609 dst += stride; 610 } 611 } 612 613 #undef HIGHBD_DC_SHIFT2 614 615 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride, 616 const uint16_t *above, const uint16_t *left, 617 int bd) { 618 highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2, 619 HIGHBD_DC_MULTIPLIER_1X2); 620 } 621 622 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride, 623 const uint16_t *above, const uint16_t *left, 624 int bd) { 625 highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2, 626 HIGHBD_DC_MULTIPLIER_1X2); 627 } 628 629 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 630 void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride, 631 const uint16_t *above, const uint16_t *left, 632 int bd) { 633 highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2, 634 HIGHBD_DC_MULTIPLIER_1X4); 635 } 636 637 void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride, 638 const uint16_t *above, const uint16_t *left, 639 int bd) { 640 highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2, 641 HIGHBD_DC_MULTIPLIER_1X4); 642 } 643 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 644 645 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride, 646 const uint16_t *above, const uint16_t *left, 647 int bd) { 648 highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3, 649 HIGHBD_DC_MULTIPLIER_1X2); 650 } 651 652 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride, 653 const uint16_t *above, const uint16_t *left, 654 int bd) { 655 highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3, 656 HIGHBD_DC_MULTIPLIER_1X2); 657 } 658 659 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 660 void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride, 661 const uint16_t *above, const uint16_t *left, 662 int bd) { 663 highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3, 664 HIGHBD_DC_MULTIPLIER_1X4); 665 } 666 667 void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride, 668 const uint16_t *above, const uint16_t *left, 669 int bd) { 670 highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3, 671 HIGHBD_DC_MULTIPLIER_1X4); 672 } 673 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 674 675 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride, 676 const uint16_t *above, 677 const uint16_t *left, int bd) { 678 highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4, 679 HIGHBD_DC_MULTIPLIER_1X2); 680 } 681 682 void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride, 683 const uint16_t *above, 684 const uint16_t *left, int bd) { 685 highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4, 686 HIGHBD_DC_MULTIPLIER_1X2); 687 } 688 689 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 690 void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride, 691 const uint16_t *above, 692 const uint16_t *left, int bd) { 693 highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4, 694 HIGHBD_DC_MULTIPLIER_1X4); 695 } 696 697 void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride, 698 const uint16_t *above, 699 const uint16_t *left, int bd) { 700 highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4, 701 HIGHBD_DC_MULTIPLIER_1X4); 702 } 703 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 704 705 void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride, 706 const uint16_t *above, 707 const uint16_t *left, int bd) { 708 highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5, 709 HIGHBD_DC_MULTIPLIER_1X2); 710 } 711 712 void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride, 713 const uint16_t *above, 714 const uint16_t *left, int bd) { 715 highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5, 716 HIGHBD_DC_MULTIPLIER_1X2); 717 } 718 719 #undef HIGHBD_DC_MULTIPLIER_1X2 720 #undef HIGHBD_DC_MULTIPLIER_1X4 721 #endif // CONFIG_AV1_HIGHBITDEPTH 722 723 // This serves as a wrapper function, so that all the prediction functions 724 // can be unified and accessed as a pointer array. Note that the boundary 725 // above and left are not necessarily used all the time. 726 #define intra_pred_sized(type, width, height) \ 727 void aom_##type##_predictor_##width##x##height##_c( \ 728 uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \ 729 const uint8_t *left) { \ 730 type##_predictor(dst, stride, width, height, above, left); \ 731 } 732 733 #if CONFIG_AV1_HIGHBITDEPTH 734 #define intra_pred_highbd_sized(type, width, height) \ 735 void aom_highbd_##type##_predictor_##width##x##height##_c( \ 736 uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ 737 const uint16_t *left, int bd) { \ 738 highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \ 739 } 740 #else // !CONFIG_AV1_HIGHBITDEPTH 741 #define intra_pred_highbd_sized(type, width, height) 742 #endif // CONFIG_AV1_HIGHBITDEPTH 743 744 /* clang-format off */ 745 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 746 #define intra_pred_rectangular(type) \ 747 intra_pred_sized(type, 4, 8) \ 748 intra_pred_sized(type, 8, 4) \ 749 intra_pred_sized(type, 8, 16) \ 750 intra_pred_sized(type, 16, 8) \ 751 intra_pred_sized(type, 16, 32) \ 752 intra_pred_sized(type, 32, 16) \ 753 intra_pred_sized(type, 32, 64) \ 754 intra_pred_sized(type, 64, 32) \ 755 intra_pred_highbd_sized(type, 4, 8) \ 756 intra_pred_highbd_sized(type, 8, 4) \ 757 intra_pred_highbd_sized(type, 8, 16) \ 758 intra_pred_highbd_sized(type, 16, 8) \ 759 intra_pred_highbd_sized(type, 16, 32) \ 760 intra_pred_highbd_sized(type, 32, 16) \ 761 intra_pred_highbd_sized(type, 32, 64) \ 762 intra_pred_highbd_sized(type, 64, 32) 763 #else 764 #define intra_pred_rectangular(type) \ 765 intra_pred_sized(type, 4, 8) \ 766 intra_pred_sized(type, 8, 4) \ 767 intra_pred_sized(type, 8, 16) \ 768 intra_pred_sized(type, 16, 8) \ 769 intra_pred_sized(type, 16, 32) \ 770 intra_pred_sized(type, 32, 16) \ 771 intra_pred_sized(type, 32, 64) \ 772 intra_pred_sized(type, 64, 32) \ 773 intra_pred_sized(type, 4, 16) \ 774 intra_pred_sized(type, 16, 4) \ 775 intra_pred_sized(type, 8, 32) \ 776 intra_pred_sized(type, 32, 8) \ 777 intra_pred_sized(type, 16, 64) \ 778 intra_pred_sized(type, 64, 16) \ 779 intra_pred_highbd_sized(type, 4, 8) \ 780 intra_pred_highbd_sized(type, 8, 4) \ 781 intra_pred_highbd_sized(type, 8, 16) \ 782 intra_pred_highbd_sized(type, 16, 8) \ 783 intra_pred_highbd_sized(type, 16, 32) \ 784 intra_pred_highbd_sized(type, 32, 16) \ 785 intra_pred_highbd_sized(type, 32, 64) \ 786 intra_pred_highbd_sized(type, 64, 32) \ 787 intra_pred_highbd_sized(type, 4, 16) \ 788 intra_pred_highbd_sized(type, 16, 4) \ 789 intra_pred_highbd_sized(type, 8, 32) \ 790 intra_pred_highbd_sized(type, 32, 8) \ 791 intra_pred_highbd_sized(type, 16, 64) \ 792 intra_pred_highbd_sized(type, 64, 16) 793 #endif // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 794 795 #define intra_pred_above_4x4(type) \ 796 intra_pred_sized(type, 8, 8) \ 797 intra_pred_sized(type, 16, 16) \ 798 intra_pred_sized(type, 32, 32) \ 799 intra_pred_sized(type, 64, 64) \ 800 intra_pred_highbd_sized(type, 4, 4) \ 801 intra_pred_highbd_sized(type, 8, 8) \ 802 intra_pred_highbd_sized(type, 16, 16) \ 803 intra_pred_highbd_sized(type, 32, 32) \ 804 intra_pred_highbd_sized(type, 64, 64) \ 805 intra_pred_rectangular(type) 806 #define intra_pred_allsizes(type) \ 807 intra_pred_sized(type, 4, 4) \ 808 intra_pred_above_4x4(type) 809 #define intra_pred_square(type) \ 810 intra_pred_sized(type, 4, 4) \ 811 intra_pred_sized(type, 8, 8) \ 812 intra_pred_sized(type, 16, 16) \ 813 intra_pred_sized(type, 32, 32) \ 814 intra_pred_sized(type, 64, 64) \ 815 intra_pred_highbd_sized(type, 4, 4) \ 816 intra_pred_highbd_sized(type, 8, 8) \ 817 intra_pred_highbd_sized(type, 16, 16) \ 818 intra_pred_highbd_sized(type, 32, 32) \ 819 intra_pred_highbd_sized(type, 64, 64) 820 821 intra_pred_allsizes(v) 822 intra_pred_allsizes(h) 823 intra_pred_allsizes(smooth) 824 intra_pred_allsizes(smooth_v) 825 intra_pred_allsizes(smooth_h) 826 intra_pred_allsizes(paeth) 827 intra_pred_allsizes(dc_128) 828 intra_pred_allsizes(dc_left) 829 intra_pred_allsizes(dc_top) 830 intra_pred_square(dc) 831 /* clang-format on */ 832 #undef intra_pred_allsizes