reconintra.c (70328B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <math.h> 14 15 #include "config/aom_config.h" 16 #include "config/aom_dsp_rtcd.h" 17 #include "config/av1_rtcd.h" 18 19 #include "aom_dsp/aom_dsp_common.h" 20 #include "aom_mem/aom_mem.h" 21 #include "aom_ports/aom_once.h" 22 #include "aom_ports/mem.h" 23 #include "av1/common/av1_common_int.h" 24 #include "av1/common/cfl.h" 25 #include "av1/common/reconintra.h" 26 27 enum { 28 NEED_LEFT = 1 << 1, 29 NEED_ABOVE = 1 << 2, 30 NEED_ABOVERIGHT = 1 << 3, 31 NEED_ABOVELEFT = 1 << 4, 32 NEED_BOTTOMLEFT = 1 << 5, 33 }; 34 35 #define INTRA_EDGE_FILT 3 36 #define INTRA_EDGE_TAPS 5 37 #define MAX_UPSAMPLE_SZ 16 38 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32) 39 40 static const uint8_t extend_modes[INTRA_MODES] = { 41 NEED_ABOVE | NEED_LEFT, // DC 42 NEED_ABOVE, // V 43 NEED_LEFT, // H 44 NEED_ABOVE | NEED_ABOVERIGHT, // D45 45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135 46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113 47 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157 48 NEED_LEFT | NEED_BOTTOMLEFT, // D203 49 NEED_ABOVE | NEED_ABOVERIGHT, // D67 50 NEED_LEFT | NEED_ABOVE, // SMOOTH 51 NEED_LEFT | NEED_ABOVE, // SMOOTH_V 52 NEED_LEFT | NEED_ABOVE, // SMOOTH_H 53 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH 54 }; 55 56 // Tables to store if the top-right reference pixels are available. The flags 57 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32 58 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster 59 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table, 60 // i.e. (table[10 / 8] >> (10 % 8)) & 1. 61 // . . . . 62 // . . . . 63 // . . o . 64 // . . . . 65 static uint8_t has_tr_4x4[128] = { 66 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 67 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 68 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 69 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 70 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 71 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 72 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 73 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85, 74 }; 75 static uint8_t has_tr_4x8[64] = { 76 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119, 77 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127, 78 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119, 79 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127, 80 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119, 81 }; 82 static uint8_t has_tr_8x4[64] = { 83 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, 84 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, 85 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, 86 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0, 87 }; 88 static uint8_t has_tr_8x8[32] = { 89 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85, 90 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85, 91 }; 92 static uint8_t has_tr_8x16[16] = { 93 255, 255, 119, 119, 127, 127, 119, 119, 94 255, 127, 119, 119, 127, 127, 119, 119, 95 }; 96 static uint8_t has_tr_16x8[16] = { 97 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0, 98 }; 99 static uint8_t has_tr_16x16[8] = { 100 255, 85, 119, 85, 127, 85, 119, 85, 101 }; 102 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 }; 103 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 }; 104 static uint8_t has_tr_32x32[2] = { 95, 87 }; 105 static uint8_t has_tr_32x64[1] = { 127 }; 106 static uint8_t has_tr_64x32[1] = { 19 }; 107 static uint8_t has_tr_64x64[1] = { 7 }; 108 static uint8_t has_tr_64x128[1] = { 3 }; 109 static uint8_t has_tr_128x64[1] = { 1 }; 110 static uint8_t has_tr_128x128[1] = { 1 }; 111 static uint8_t has_tr_4x16[32] = { 112 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255, 113 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127, 114 127, 127, 255, 127, 255, 127, 127, 127, 127, 127, 115 }; 116 static uint8_t has_tr_16x4[32] = { 117 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0, 118 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0, 119 }; 120 static uint8_t has_tr_8x32[8] = { 121 255, 255, 127, 127, 255, 127, 127, 127, 122 }; 123 static uint8_t has_tr_32x8[8] = { 124 15, 0, 5, 0, 7, 0, 5, 0, 125 }; 126 static uint8_t has_tr_16x64[2] = { 255, 127 }; 127 static uint8_t has_tr_64x16[2] = { 3, 1 }; 128 129 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = { 130 // 4X4 131 has_tr_4x4, 132 // 4X8, 8X4, 8X8 133 has_tr_4x8, has_tr_8x4, has_tr_8x8, 134 // 8X16, 16X8, 16X16 135 has_tr_8x16, has_tr_16x8, has_tr_16x16, 136 // 16X32, 32X16, 32X32 137 has_tr_16x32, has_tr_32x16, has_tr_32x32, 138 // 32X64, 64X32, 64X64 139 has_tr_32x64, has_tr_64x32, has_tr_64x64, 140 // 64x128, 128x64, 128x128 141 has_tr_64x128, has_tr_128x64, has_tr_128x128, 142 // 4x16, 16x4, 8x32 143 has_tr_4x16, has_tr_16x4, has_tr_8x32, 144 // 32x8, 16x64, 64x16 145 has_tr_32x8, has_tr_16x64, has_tr_64x16 146 }; 147 148 static uint8_t has_tr_vert_8x8[32] = { 149 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0, 150 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0, 151 }; 152 static uint8_t has_tr_vert_16x16[8] = { 153 255, 0, 119, 0, 127, 0, 119, 0, 154 }; 155 static uint8_t has_tr_vert_32x32[2] = { 15, 7 }; 156 static uint8_t has_tr_vert_64x64[1] = { 3 }; 157 158 // The _vert_* tables are like the ordinary tables above, but describe the 159 // order we visit square blocks when doing a PARTITION_VERT_A or 160 // PARTITION_VERT_B. This is the same order as normal except for on the last 161 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block 162 // as a pair of squares, which means that these tables work correctly for both 163 // mixed vertical partition types. 164 // 165 // There are tables for each of the square sizes. Vertical rectangles (like 166 // BLOCK_16X32) use their respective "non-vert" table 167 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = { 168 // 4X4 169 NULL, 170 // 4X8, 8X4, 8X8 171 has_tr_4x8, NULL, has_tr_vert_8x8, 172 // 8X16, 16X8, 16X16 173 has_tr_8x16, NULL, has_tr_vert_16x16, 174 // 16X32, 32X16, 32X32 175 has_tr_16x32, NULL, has_tr_vert_32x32, 176 // 32X64, 64X32, 64X64 177 has_tr_32x64, NULL, has_tr_vert_64x64, 178 // 64x128, 128x64, 128x128 179 has_tr_64x128, NULL, has_tr_128x128 180 }; 181 182 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition, 183 BLOCK_SIZE bsize) { 184 const uint8_t *ret = NULL; 185 // If this is a mixed vertical partition, look up bsize in orders_vert. 186 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { 187 assert(bsize < BLOCK_SIZES); 188 ret = has_tr_vert_tables[bsize]; 189 } else { 190 ret = has_tr_tables[bsize]; 191 } 192 assert(ret); 193 return ret; 194 } 195 196 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row, 197 int mi_col, int top_available, int right_available, 198 PARTITION_TYPE partition, TX_SIZE txsz, int row_off, 199 int col_off, int ss_x, int ss_y) { 200 if (!top_available || !right_available) return 0; 201 202 const int bw_unit = mi_size_wide[bsize]; 203 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1); 204 const int top_right_count_unit = tx_size_wide_unit[txsz]; 205 206 if (row_off > 0) { // Just need to check if enough pixels on the right. 207 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) { 208 // Special case: For 128x128 blocks, the transform unit whose 209 // top-right corner is at the center of the block does in fact have 210 // pixels available at its top-right corner. 211 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y && 212 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) { 213 return 1; 214 } 215 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x; 216 const int col_off_64 = col_off % plane_bw_unit_64; 217 return col_off_64 + top_right_count_unit < plane_bw_unit_64; 218 } 219 return col_off + top_right_count_unit < plane_bw_unit; 220 } else { 221 // All top-right pixels are in the block above, which is already available. 222 if (col_off + top_right_count_unit < plane_bw_unit) return 1; 223 224 const int bw_in_mi_log2 = mi_size_wide_log2[bsize]; 225 const int bh_in_mi_log2 = mi_size_high_log2[bsize]; 226 const int sb_mi_size = mi_size_high[sb_size]; 227 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2; 228 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2; 229 230 // Top row of superblock: so top-right pixels are in the top and/or 231 // top-right superblocks, both of which are already available. 232 if (blk_row_in_sb == 0) return 1; 233 234 // Rightmost column of superblock (and not the top row): so top-right pixels 235 // fall in the right superblock, which is not available yet. 236 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) { 237 return 0; 238 } 239 240 // General case (neither top row nor rightmost column): check if the 241 // top-right block is coded before the current block. 242 const int this_blk_index = 243 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) + 244 blk_col_in_sb + 0; 245 const int idx1 = this_blk_index / 8; 246 const int idx2 = this_blk_index % 8; 247 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize); 248 return (has_tr_table[idx1] >> idx2) & 1; 249 } 250 } 251 252 // Similar to the has_tr_* tables, but store if the bottom-left reference 253 // pixels are available. 254 static uint8_t has_bl_4x4[128] = { 255 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 256 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17, 257 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 258 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 259 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 260 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 261 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0, 262 }; 263 static uint8_t has_bl_4x8[64] = { 264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, 265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, 266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0, 267 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0, 268 }; 269 static uint8_t has_bl_8x4[64] = { 270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, 271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, 272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1, 273 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0, 274 }; 275 static uint8_t has_bl_8x8[32] = { 276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, 277 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0, 278 }; 279 static uint8_t has_bl_8x16[16] = { 280 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0, 281 }; 282 static uint8_t has_bl_16x8[16] = { 283 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0, 284 }; 285 static uint8_t has_bl_16x16[8] = { 286 84, 16, 84, 0, 84, 16, 84, 0, 287 }; 288 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 }; 289 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 }; 290 static uint8_t has_bl_32x32[2] = { 4, 4 }; 291 static uint8_t has_bl_32x64[1] = { 0 }; 292 static uint8_t has_bl_64x32[1] = { 34 }; 293 static uint8_t has_bl_64x64[1] = { 0 }; 294 static uint8_t has_bl_64x128[1] = { 0 }; 295 static uint8_t has_bl_128x64[1] = { 0 }; 296 static uint8_t has_bl_128x128[1] = { 0 }; 297 static uint8_t has_bl_4x16[32] = { 298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 299 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 300 }; 301 static uint8_t has_bl_16x4[32] = { 302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, 303 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0, 304 }; 305 static uint8_t has_bl_8x32[8] = { 306 0, 1, 0, 0, 0, 1, 0, 0, 307 }; 308 static uint8_t has_bl_32x8[8] = { 309 238, 78, 238, 14, 238, 78, 238, 14, 310 }; 311 static uint8_t has_bl_16x64[2] = { 0, 0 }; 312 static uint8_t has_bl_64x16[2] = { 42, 42 }; 313 314 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = { 315 // 4X4 316 has_bl_4x4, 317 // 4X8, 8X4, 8X8 318 has_bl_4x8, has_bl_8x4, has_bl_8x8, 319 // 8X16, 16X8, 16X16 320 has_bl_8x16, has_bl_16x8, has_bl_16x16, 321 // 16X32, 32X16, 32X32 322 has_bl_16x32, has_bl_32x16, has_bl_32x32, 323 // 32X64, 64X32, 64X64 324 has_bl_32x64, has_bl_64x32, has_bl_64x64, 325 // 64x128, 128x64, 128x128 326 has_bl_64x128, has_bl_128x64, has_bl_128x128, 327 // 4x16, 16x4, 8x32 328 has_bl_4x16, has_bl_16x4, has_bl_8x32, 329 // 32x8, 16x64, 64x16 330 has_bl_32x8, has_bl_16x64, has_bl_64x16 331 }; 332 333 static uint8_t has_bl_vert_8x8[32] = { 334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, 335 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0, 336 }; 337 static uint8_t has_bl_vert_16x16[8] = { 338 254, 16, 254, 0, 254, 16, 254, 0, 339 }; 340 static uint8_t has_bl_vert_32x32[2] = { 14, 14 }; 341 static uint8_t has_bl_vert_64x64[1] = { 2 }; 342 343 // The _vert_* tables are like the ordinary tables above, but describe the 344 // order we visit square blocks when doing a PARTITION_VERT_A or 345 // PARTITION_VERT_B. This is the same order as normal except for on the last 346 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block 347 // as a pair of squares, which means that these tables work correctly for both 348 // mixed vertical partition types. 349 // 350 // There are tables for each of the square sizes. Vertical rectangles (like 351 // BLOCK_16X32) use their respective "non-vert" table 352 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = { 353 // 4X4 354 NULL, 355 // 4X8, 8X4, 8X8 356 has_bl_4x8, NULL, has_bl_vert_8x8, 357 // 8X16, 16X8, 16X16 358 has_bl_8x16, NULL, has_bl_vert_16x16, 359 // 16X32, 32X16, 32X32 360 has_bl_16x32, NULL, has_bl_vert_32x32, 361 // 32X64, 64X32, 64X64 362 has_bl_32x64, NULL, has_bl_vert_64x64, 363 // 64x128, 128x64, 128x128 364 has_bl_64x128, NULL, has_bl_128x128 365 }; 366 367 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition, 368 BLOCK_SIZE bsize) { 369 const uint8_t *ret = NULL; 370 // If this is a mixed vertical partition, look up bsize in orders_vert. 371 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) { 372 assert(bsize < BLOCK_SIZES); 373 ret = has_bl_vert_tables[bsize]; 374 } else { 375 ret = has_bl_tables[bsize]; 376 } 377 assert(ret); 378 return ret; 379 } 380 381 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row, 382 int mi_col, int bottom_available, int left_available, 383 PARTITION_TYPE partition, TX_SIZE txsz, int row_off, 384 int col_off, int ss_x, int ss_y) { 385 if (!bottom_available || !left_available) return 0; 386 387 // Special case for 128x* blocks, when col_off is half the block width. 388 // This is needed because 128x* superblocks are divided into 64x* blocks in 389 // raster order 390 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) { 391 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x; 392 const int col_off_64 = col_off % plane_bw_unit_64; 393 if (col_off_64 == 0) { 394 // We are at the left edge of top-right or bottom-right 64x* block. 395 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y; 396 const int row_off_64 = row_off % plane_bh_unit_64; 397 const int plane_bh_unit = 398 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64); 399 // Check if all bottom-left pixels are in the left 64x* block (which is 400 // already coded). 401 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit; 402 } 403 } 404 405 if (col_off > 0) { 406 // Bottom-left pixels are in the bottom-left block, which is not available. 407 return 0; 408 } else { 409 const int bh_unit = mi_size_high[bsize]; 410 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1); 411 const int bottom_left_count_unit = tx_size_high_unit[txsz]; 412 413 // All bottom-left pixels are in the left block, which is already available. 414 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1; 415 416 const int bw_in_mi_log2 = mi_size_wide_log2[bsize]; 417 const int bh_in_mi_log2 = mi_size_high_log2[bsize]; 418 const int sb_mi_size = mi_size_high[sb_size]; 419 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2; 420 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2; 421 422 // Leftmost column of superblock: so bottom-left pixels maybe in the left 423 // and/or bottom-left superblocks. But only the left superblock is 424 // available, so check if all required pixels fall in that superblock. 425 if (blk_col_in_sb == 0) { 426 const int blk_start_row_off = 427 blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >> 428 ss_y; 429 const int row_off_in_sb = blk_start_row_off + row_off; 430 const int sb_height_unit = sb_mi_size >> ss_y; 431 return row_off_in_sb + bottom_left_count_unit < sb_height_unit; 432 } 433 434 // Bottom row of superblock (and not the leftmost column): so bottom-left 435 // pixels fall in the bottom superblock, which is not available yet. 436 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0; 437 438 // General case (neither leftmost column nor bottom row): check if the 439 // bottom-left block is coded before the current block. 440 const int this_blk_index = 441 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) + 442 blk_col_in_sb + 0; 443 const int idx1 = this_blk_index / 8; 444 const int idx2 = this_blk_index % 8; 445 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize); 446 return (has_bl_table[idx1] >> idx2) & 1; 447 } 448 } 449 450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, 451 const uint8_t *above, const uint8_t *left); 452 453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL]; 454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL]; 455 456 #if CONFIG_AV1_HIGHBITDEPTH 457 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, 458 const uint16_t *above, const uint16_t *left, 459 int bd); 460 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL]; 461 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL]; 462 #endif 463 464 static void init_intra_predictors_internal(void) { 465 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES); 466 467 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 468 #define INIT_RECTANGULAR(p, type) \ 469 p[TX_4X8] = aom_##type##_predictor_4x8; \ 470 p[TX_8X4] = aom_##type##_predictor_8x4; \ 471 p[TX_8X16] = aom_##type##_predictor_8x16; \ 472 p[TX_16X8] = aom_##type##_predictor_16x8; \ 473 p[TX_16X32] = aom_##type##_predictor_16x32; \ 474 p[TX_32X16] = aom_##type##_predictor_32x16; \ 475 p[TX_32X64] = aom_##type##_predictor_32x64; \ 476 p[TX_64X32] = aom_##type##_predictor_64x32; 477 #else 478 #define INIT_RECTANGULAR(p, type) \ 479 p[TX_4X8] = aom_##type##_predictor_4x8; \ 480 p[TX_8X4] = aom_##type##_predictor_8x4; \ 481 p[TX_8X16] = aom_##type##_predictor_8x16; \ 482 p[TX_16X8] = aom_##type##_predictor_16x8; \ 483 p[TX_16X32] = aom_##type##_predictor_16x32; \ 484 p[TX_32X16] = aom_##type##_predictor_32x16; \ 485 p[TX_32X64] = aom_##type##_predictor_32x64; \ 486 p[TX_64X32] = aom_##type##_predictor_64x32; \ 487 p[TX_4X16] = aom_##type##_predictor_4x16; \ 488 p[TX_16X4] = aom_##type##_predictor_16x4; \ 489 p[TX_8X32] = aom_##type##_predictor_8x32; \ 490 p[TX_32X8] = aom_##type##_predictor_32x8; \ 491 p[TX_16X64] = aom_##type##_predictor_16x64; \ 492 p[TX_64X16] = aom_##type##_predictor_64x16; 493 #endif // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 494 495 #define INIT_NO_4X4(p, type) \ 496 p[TX_8X8] = aom_##type##_predictor_8x8; \ 497 p[TX_16X16] = aom_##type##_predictor_16x16; \ 498 p[TX_32X32] = aom_##type##_predictor_32x32; \ 499 p[TX_64X64] = aom_##type##_predictor_64x64; \ 500 INIT_RECTANGULAR(p, type) 501 502 #define INIT_ALL_SIZES(p, type) \ 503 p[TX_4X4] = aom_##type##_predictor_4x4; \ 504 INIT_NO_4X4(p, type) 505 506 INIT_ALL_SIZES(pred[V_PRED], v) 507 INIT_ALL_SIZES(pred[H_PRED], h) 508 INIT_ALL_SIZES(pred[PAETH_PRED], paeth) 509 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth) 510 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v) 511 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h) 512 INIT_ALL_SIZES(dc_pred[0][0], dc_128) 513 INIT_ALL_SIZES(dc_pred[0][1], dc_top) 514 INIT_ALL_SIZES(dc_pred[1][0], dc_left) 515 INIT_ALL_SIZES(dc_pred[1][1], dc) 516 #if CONFIG_AV1_HIGHBITDEPTH 517 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v) 518 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h) 519 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth) 520 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth) 521 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v) 522 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h) 523 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128) 524 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top) 525 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left) 526 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc) 527 #endif 528 #undef intra_pred_allsizes 529 } 530 531 // Directional prediction, zone 1: 0 < angle < 90 532 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 533 const uint8_t *above, const uint8_t *left, 534 int upsample_above, int dx, int dy) { 535 int r, c, x, base, shift, val; 536 537 (void)left; 538 (void)dy; 539 assert(dy == 1); 540 assert(dx > 0); 541 542 const int max_base_x = ((bw + bh) - 1) << upsample_above; 543 const int frac_bits = 6 - upsample_above; 544 const int base_inc = 1 << upsample_above; 545 x = dx; 546 for (r = 0; r < bh; ++r, dst += stride, x += dx) { 547 base = x >> frac_bits; 548 shift = ((x << upsample_above) & 0x3F) >> 1; 549 550 if (base >= max_base_x) { 551 for (int i = r; i < bh; ++i) { 552 memset(dst, above[max_base_x], bw * sizeof(dst[0])); 553 dst += stride; 554 } 555 return; 556 } 557 558 for (c = 0; c < bw; ++c, base += base_inc) { 559 if (base < max_base_x) { 560 val = above[base] * (32 - shift) + above[base + 1] * shift; 561 dst[c] = ROUND_POWER_OF_TWO(val, 5); 562 } else { 563 dst[c] = above[max_base_x]; 564 } 565 } 566 } 567 } 568 569 // Directional prediction, zone 2: 90 < angle < 180 570 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 571 const uint8_t *above, const uint8_t *left, 572 int upsample_above, int upsample_left, int dx, 573 int dy) { 574 assert(dx > 0); 575 assert(dy > 0); 576 577 const int min_base_x = -(1 << upsample_above); 578 const int min_base_y = -(1 << upsample_left); 579 (void)min_base_y; 580 const int frac_bits_x = 6 - upsample_above; 581 const int frac_bits_y = 6 - upsample_left; 582 583 for (int r = 0; r < bh; ++r) { 584 for (int c = 0; c < bw; ++c) { 585 int val; 586 int y = r + 1; 587 int x = (c << 6) - y * dx; 588 const int base_x = x >> frac_bits_x; 589 if (base_x >= min_base_x) { 590 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1; 591 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift; 592 val = ROUND_POWER_OF_TWO(val, 5); 593 } else { 594 x = c + 1; 595 y = (r << 6) - x * dy; 596 const int base_y = y >> frac_bits_y; 597 assert(base_y >= min_base_y); 598 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1; 599 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift; 600 val = ROUND_POWER_OF_TWO(val, 5); 601 } 602 dst[c] = val; 603 } 604 dst += stride; 605 } 606 } 607 608 // Directional prediction, zone 3: 180 < angle < 270 609 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, 610 const uint8_t *above, const uint8_t *left, 611 int upsample_left, int dx, int dy) { 612 int r, c, y, base, shift, val; 613 614 (void)above; 615 (void)dx; 616 617 assert(dx == 1); 618 assert(dy > 0); 619 620 const int max_base_y = (bw + bh - 1) << upsample_left; 621 const int frac_bits = 6 - upsample_left; 622 const int base_inc = 1 << upsample_left; 623 y = dy; 624 for (c = 0; c < bw; ++c, y += dy) { 625 base = y >> frac_bits; 626 shift = ((y << upsample_left) & 0x3F) >> 1; 627 628 for (r = 0; r < bh; ++r, base += base_inc) { 629 if (base < max_base_y) { 630 val = left[base] * (32 - shift) + left[base + 1] * shift; 631 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5); 632 } else { 633 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y]; 634 break; 635 } 636 } 637 } 638 } 639 640 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, 641 const uint8_t *above, const uint8_t *left, 642 int upsample_above, int upsample_left, int angle) { 643 const int dx = av1_get_dx(angle); 644 const int dy = av1_get_dy(angle); 645 const int bw = tx_size_wide[tx_size]; 646 const int bh = tx_size_high[tx_size]; 647 assert(angle > 0 && angle < 270); 648 649 if (angle > 0 && angle < 90) { 650 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx, 651 dy); 652 } else if (angle > 90 && angle < 180) { 653 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above, 654 upsample_left, dx, dy); 655 } else if (angle > 180 && angle < 270) { 656 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx, 657 dy); 658 } else if (angle == 90) { 659 pred[V_PRED][tx_size](dst, stride, above, left); 660 } else if (angle == 180) { 661 pred[H_PRED][tx_size](dst, stride, above, left); 662 } 663 } 664 665 #if CONFIG_AV1_HIGHBITDEPTH 666 // Directional prediction, zone 1: 0 < angle < 90 667 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, 668 int bh, const uint16_t *above, 669 const uint16_t *left, int upsample_above, 670 int dx, int dy, int bd) { 671 int r, c, x, base, shift, val; 672 673 (void)left; 674 (void)dy; 675 (void)bd; 676 assert(dy == 1); 677 assert(dx > 0); 678 679 const int max_base_x = ((bw + bh) - 1) << upsample_above; 680 const int frac_bits = 6 - upsample_above; 681 const int base_inc = 1 << upsample_above; 682 x = dx; 683 for (r = 0; r < bh; ++r, dst += stride, x += dx) { 684 base = x >> frac_bits; 685 shift = ((x << upsample_above) & 0x3F) >> 1; 686 687 if (base >= max_base_x) { 688 for (int i = r; i < bh; ++i) { 689 aom_memset16(dst, above[max_base_x], bw); 690 dst += stride; 691 } 692 return; 693 } 694 695 for (c = 0; c < bw; ++c, base += base_inc) { 696 if (base < max_base_x) { 697 val = above[base] * (32 - shift) + above[base + 1] * shift; 698 dst[c] = ROUND_POWER_OF_TWO(val, 5); 699 } else { 700 dst[c] = above[max_base_x]; 701 } 702 } 703 } 704 } 705 706 // Directional prediction, zone 2: 90 < angle < 180 707 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw, 708 int bh, const uint16_t *above, 709 const uint16_t *left, int upsample_above, 710 int upsample_left, int dx, int dy, int bd) { 711 (void)bd; 712 assert(dx > 0); 713 assert(dy > 0); 714 715 const int min_base_x = -(1 << upsample_above); 716 const int min_base_y = -(1 << upsample_left); 717 (void)min_base_y; 718 const int frac_bits_x = 6 - upsample_above; 719 const int frac_bits_y = 6 - upsample_left; 720 721 for (int r = 0; r < bh; ++r) { 722 for (int c = 0; c < bw; ++c) { 723 int val; 724 int y = r + 1; 725 int x = (c << 6) - y * dx; 726 const int base_x = x >> frac_bits_x; 727 if (base_x >= min_base_x) { 728 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1; 729 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift; 730 val = ROUND_POWER_OF_TWO(val, 5); 731 } else { 732 x = c + 1; 733 y = (r << 6) - x * dy; 734 const int base_y = y >> frac_bits_y; 735 assert(base_y >= min_base_y); 736 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1; 737 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift; 738 val = ROUND_POWER_OF_TWO(val, 5); 739 } 740 dst[c] = val; 741 } 742 dst += stride; 743 } 744 } 745 746 // Directional prediction, zone 3: 180 < angle < 270 747 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, 748 int bh, const uint16_t *above, 749 const uint16_t *left, int upsample_left, 750 int dx, int dy, int bd) { 751 int r, c, y, base, shift, val; 752 753 (void)above; 754 (void)dx; 755 (void)bd; 756 assert(dx == 1); 757 assert(dy > 0); 758 759 const int max_base_y = (bw + bh - 1) << upsample_left; 760 const int frac_bits = 6 - upsample_left; 761 const int base_inc = 1 << upsample_left; 762 y = dy; 763 for (c = 0; c < bw; ++c, y += dy) { 764 base = y >> frac_bits; 765 shift = ((y << upsample_left) & 0x3F) >> 1; 766 767 for (r = 0; r < bh; ++r, base += base_inc) { 768 if (base < max_base_y) { 769 val = left[base] * (32 - shift) + left[base + 1] * shift; 770 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5); 771 } else { 772 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y]; 773 break; 774 } 775 } 776 } 777 } 778 779 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, 780 TX_SIZE tx_size, const uint16_t *above, 781 const uint16_t *left, int upsample_above, 782 int upsample_left, int angle, int bd) { 783 const int dx = av1_get_dx(angle); 784 const int dy = av1_get_dy(angle); 785 const int bw = tx_size_wide[tx_size]; 786 const int bh = tx_size_high[tx_size]; 787 assert(angle > 0 && angle < 270); 788 789 if (angle > 0 && angle < 90) { 790 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left, 791 upsample_above, dx, dy, bd); 792 } else if (angle > 90 && angle < 180) { 793 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left, 794 upsample_above, upsample_left, dx, dy, bd); 795 } else if (angle > 180 && angle < 270) { 796 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, 797 dx, dy, bd); 798 } else if (angle == 90) { 799 pred_high[V_PRED][tx_size](dst, stride, above, left, bd); 800 } else if (angle == 180) { 801 pred_high[H_PRED][tx_size](dst, stride, above, left, bd); 802 } 803 } 804 #endif // CONFIG_AV1_HIGHBITDEPTH 805 806 DECLARE_ALIGNED(16, const int8_t, 807 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = { 808 { 809 { -6, 10, 0, 0, 0, 12, 0, 0 }, 810 { -5, 2, 10, 0, 0, 9, 0, 0 }, 811 { -3, 1, 1, 10, 0, 7, 0, 0 }, 812 { -3, 1, 1, 2, 10, 5, 0, 0 }, 813 { -4, 6, 0, 0, 0, 2, 12, 0 }, 814 { -3, 2, 6, 0, 0, 2, 9, 0 }, 815 { -3, 2, 2, 6, 0, 2, 7, 0 }, 816 { -3, 1, 2, 2, 6, 3, 5, 0 }, 817 }, 818 { 819 { -10, 16, 0, 0, 0, 10, 0, 0 }, 820 { -6, 0, 16, 0, 0, 6, 0, 0 }, 821 { -4, 0, 0, 16, 0, 4, 0, 0 }, 822 { -2, 0, 0, 0, 16, 2, 0, 0 }, 823 { -10, 16, 0, 0, 0, 0, 10, 0 }, 824 { -6, 0, 16, 0, 0, 0, 6, 0 }, 825 { -4, 0, 0, 16, 0, 0, 4, 0 }, 826 { -2, 0, 0, 0, 16, 0, 2, 0 }, 827 }, 828 { 829 { -8, 8, 0, 0, 0, 16, 0, 0 }, 830 { -8, 0, 8, 0, 0, 16, 0, 0 }, 831 { -8, 0, 0, 8, 0, 16, 0, 0 }, 832 { -8, 0, 0, 0, 8, 16, 0, 0 }, 833 { -4, 4, 0, 0, 0, 0, 16, 0 }, 834 { -4, 0, 4, 0, 0, 0, 16, 0 }, 835 { -4, 0, 0, 4, 0, 0, 16, 0 }, 836 { -4, 0, 0, 0, 4, 0, 16, 0 }, 837 }, 838 { 839 { -2, 8, 0, 0, 0, 10, 0, 0 }, 840 { -1, 3, 8, 0, 0, 6, 0, 0 }, 841 { -1, 2, 3, 8, 0, 4, 0, 0 }, 842 { 0, 1, 2, 3, 8, 2, 0, 0 }, 843 { -1, 4, 0, 0, 0, 3, 10, 0 }, 844 { -1, 3, 4, 0, 0, 4, 6, 0 }, 845 { -1, 2, 3, 4, 0, 4, 4, 0 }, 846 { -1, 2, 2, 3, 4, 3, 3, 0 }, 847 }, 848 { 849 { -12, 14, 0, 0, 0, 14, 0, 0 }, 850 { -10, 0, 14, 0, 0, 12, 0, 0 }, 851 { -9, 0, 0, 14, 0, 11, 0, 0 }, 852 { -8, 0, 0, 0, 14, 10, 0, 0 }, 853 { -10, 12, 0, 0, 0, 0, 14, 0 }, 854 { -9, 1, 12, 0, 0, 0, 12, 0 }, 855 { -8, 0, 0, 12, 0, 1, 11, 0 }, 856 { -7, 0, 0, 1, 12, 1, 9, 0 }, 857 }, 858 }; 859 860 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, 861 TX_SIZE tx_size, const uint8_t *above, 862 const uint8_t *left, int mode) { 863 int r, c; 864 uint8_t buffer[33][33]; 865 const int bw = tx_size_wide[tx_size]; 866 const int bh = tx_size_high[tx_size]; 867 868 assert(bw <= 32 && bh <= 32); 869 870 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r]; 871 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t)); 872 873 for (r = 1; r < bh + 1; r += 2) 874 for (c = 1; c < bw + 1; c += 4) { 875 const uint8_t p0 = buffer[r - 1][c - 1]; 876 const uint8_t p1 = buffer[r - 1][c]; 877 const uint8_t p2 = buffer[r - 1][c + 1]; 878 const uint8_t p3 = buffer[r - 1][c + 2]; 879 const uint8_t p4 = buffer[r - 1][c + 3]; 880 const uint8_t p5 = buffer[r][c - 1]; 881 const uint8_t p6 = buffer[r + 1][c - 1]; 882 for (int k = 0; k < 8; ++k) { 883 int r_offset = k >> 2; 884 int c_offset = k & 0x03; 885 int pr = av1_filter_intra_taps[mode][k][0] * p0 + 886 av1_filter_intra_taps[mode][k][1] * p1 + 887 av1_filter_intra_taps[mode][k][2] * p2 + 888 av1_filter_intra_taps[mode][k][3] * p3 + 889 av1_filter_intra_taps[mode][k][4] * p4 + 890 av1_filter_intra_taps[mode][k][5] * p5 + 891 av1_filter_intra_taps[mode][k][6] * p6; 892 // Section 7.11.2.3 specifies the right-hand side of the assignment as 893 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ). 894 // Since Clip1() clips a negative value to 0, it is safe to replace 895 // Round2Signed() with Round2(). 896 buffer[r + r_offset][c + c_offset] = 897 clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS)); 898 } 899 } 900 901 for (r = 0; r < bh; ++r) { 902 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t)); 903 dst += stride; 904 } 905 } 906 907 #if CONFIG_AV1_HIGHBITDEPTH 908 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride, 909 TX_SIZE tx_size, 910 const uint16_t *above, 911 const uint16_t *left, int mode, 912 int bd) { 913 int r, c; 914 uint16_t buffer[33][33]; 915 const int bw = tx_size_wide[tx_size]; 916 const int bh = tx_size_high[tx_size]; 917 918 assert(bw <= 32 && bh <= 32); 919 920 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r]; 921 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0])); 922 923 for (r = 1; r < bh + 1; r += 2) 924 for (c = 1; c < bw + 1; c += 4) { 925 const uint16_t p0 = buffer[r - 1][c - 1]; 926 const uint16_t p1 = buffer[r - 1][c]; 927 const uint16_t p2 = buffer[r - 1][c + 1]; 928 const uint16_t p3 = buffer[r - 1][c + 2]; 929 const uint16_t p4 = buffer[r - 1][c + 3]; 930 const uint16_t p5 = buffer[r][c - 1]; 931 const uint16_t p6 = buffer[r + 1][c - 1]; 932 for (int k = 0; k < 8; ++k) { 933 int r_offset = k >> 2; 934 int c_offset = k & 0x03; 935 int pr = av1_filter_intra_taps[mode][k][0] * p0 + 936 av1_filter_intra_taps[mode][k][1] * p1 + 937 av1_filter_intra_taps[mode][k][2] * p2 + 938 av1_filter_intra_taps[mode][k][3] * p3 + 939 av1_filter_intra_taps[mode][k][4] * p4 + 940 av1_filter_intra_taps[mode][k][5] * p5 + 941 av1_filter_intra_taps[mode][k][6] * p6; 942 // Section 7.11.2.3 specifies the right-hand side of the assignment as 943 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ). 944 // Since Clip1() clips a negative value to 0, it is safe to replace 945 // Round2Signed() with Round2(). 946 buffer[r + r_offset][c + c_offset] = clip_pixel_highbd( 947 ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd); 948 } 949 } 950 951 for (r = 0; r < bh; ++r) { 952 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0])); 953 dst += stride; 954 } 955 } 956 #endif // CONFIG_AV1_HIGHBITDEPTH 957 958 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) { 959 if (plane == 0) { 960 const PREDICTION_MODE mode = mbmi->mode; 961 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED || 962 mode == SMOOTH_H_PRED); 963 } else { 964 // uv_mode is not set for inter blocks, so need to explicitly 965 // detect that case. 966 if (is_inter_block(mbmi)) return 0; 967 968 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; 969 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED || 970 uv_mode == UV_SMOOTH_H_PRED); 971 } 972 } 973 974 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) { 975 const MB_MODE_INFO *above; 976 const MB_MODE_INFO *left; 977 978 if (plane == 0) { 979 above = xd->above_mbmi; 980 left = xd->left_mbmi; 981 } else { 982 above = xd->chroma_above_mbmi; 983 left = xd->chroma_left_mbmi; 984 } 985 986 return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane)); 987 } 988 989 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) { 990 const int d = abs(delta); 991 int strength = 0; 992 993 const int blk_wh = bs0 + bs1; 994 if (type == 0) { 995 if (blk_wh <= 8) { 996 if (d >= 56) strength = 1; 997 } else if (blk_wh <= 12) { 998 if (d >= 40) strength = 1; 999 } else if (blk_wh <= 16) { 1000 if (d >= 40) strength = 1; 1001 } else if (blk_wh <= 24) { 1002 if (d >= 8) strength = 1; 1003 if (d >= 16) strength = 2; 1004 if (d >= 32) strength = 3; 1005 } else if (blk_wh <= 32) { 1006 if (d >= 1) strength = 1; 1007 if (d >= 4) strength = 2; 1008 if (d >= 32) strength = 3; 1009 } else { 1010 if (d >= 1) strength = 3; 1011 } 1012 } else { 1013 if (blk_wh <= 8) { 1014 if (d >= 40) strength = 1; 1015 if (d >= 64) strength = 2; 1016 } else if (blk_wh <= 16) { 1017 if (d >= 20) strength = 1; 1018 if (d >= 48) strength = 2; 1019 } else if (blk_wh <= 24) { 1020 if (d >= 4) strength = 3; 1021 } else { 1022 if (d >= 1) strength = 3; 1023 } 1024 } 1025 return strength; 1026 } 1027 1028 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) { 1029 if (!strength) return; 1030 1031 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 }, 1032 { 0, 5, 6, 5, 0 }, 1033 { 2, 4, 4, 4, 2 } }; 1034 const int filt = strength - 1; 1035 uint8_t edge[129]; 1036 1037 memcpy(edge, p, sz * sizeof(*p)); 1038 for (int i = 1; i < sz; i++) { 1039 int s = 0; 1040 for (int j = 0; j < INTRA_EDGE_TAPS; j++) { 1041 int k = i - 2 + j; 1042 k = (k < 0) ? 0 : k; 1043 k = (k > sz - 1) ? sz - 1 : k; 1044 s += edge[k] * kernel[filt][j]; 1045 } 1046 s = (s + 8) >> 4; 1047 p[i] = s; 1048 } 1049 } 1050 1051 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) { 1052 const int kernel[3] = { 5, 6, 5 }; 1053 1054 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) + 1055 (p_above[0] * kernel[2]); 1056 s = (s + 8) >> 4; 1057 p_above[-1] = s; 1058 p_left[-1] = s; 1059 } 1060 1061 void av1_upsample_intra_edge_c(uint8_t *p, int sz) { 1062 // interpolate half-sample positions 1063 assert(sz <= MAX_UPSAMPLE_SZ); 1064 1065 uint8_t in[MAX_UPSAMPLE_SZ + 3]; 1066 // copy p[-1..(sz-1)] and extend first and last samples 1067 in[0] = p[-1]; 1068 in[1] = p[-1]; 1069 for (int i = 0; i < sz; i++) { 1070 in[i + 2] = p[i]; 1071 } 1072 in[sz + 2] = p[sz - 1]; 1073 1074 // interpolate half-sample edge positions 1075 p[-2] = in[0]; 1076 for (int i = 0; i < sz; i++) { 1077 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3]; 1078 s = clip_pixel((s + 8) >> 4); 1079 p[2 * i - 1] = s; 1080 p[2 * i] = in[i + 2]; 1081 } 1082 } 1083 1084 static void build_directional_and_filter_intra_predictors( 1085 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, 1086 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode, 1087 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px, 1088 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) { 1089 int i; 1090 const uint8_t *above_ref = ref - ref_stride; 1091 const uint8_t *left_ref = ref - 1; 1092 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1093 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1094 uint8_t *const above_row = above_data + 16; 1095 uint8_t *const left_col = left_data + 16; 1096 const int txwpx = tx_size_wide[tx_size]; 1097 const int txhpx = tx_size_high[tx_size]; 1098 int need_left = extend_modes[mode] & NEED_LEFT; 1099 int need_above = extend_modes[mode] & NEED_ABOVE; 1100 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; 1101 const int is_dr_mode = av1_is_directional_mode(mode); 1102 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES; 1103 assert(use_filter_intra || is_dr_mode); 1104 // The left_data, above_data buffers must be zeroed to fix some intermittent 1105 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4 1106 // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to 1107 // be the potential reason for this issue. 1108 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS); 1109 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS); 1110 1111 // The default values if ref pixels are not available: 1112 // 128 127 127 .. 127 127 127 127 127 127 1113 // 129 A B .. Y Z 1114 // 129 C D .. W X 1115 // 129 E F .. U V 1116 // 129 G H .. S T T T T T 1117 // .. 1118 1119 if (is_dr_mode) { 1120 if (p_angle <= 90) 1121 need_above = 1, need_left = 0, need_above_left = 1; 1122 else if (p_angle < 180) 1123 need_above = 1, need_left = 1, need_above_left = 1; 1124 else 1125 need_above = 0, need_left = 1, need_above_left = 1; 1126 } 1127 if (use_filter_intra) need_left = need_above = need_above_left = 1; 1128 1129 assert(n_top_px >= 0); 1130 assert(n_topright_px >= -1); 1131 assert(n_left_px >= 0); 1132 assert(n_bottomleft_px >= -1); 1133 1134 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { 1135 int val; 1136 if (need_left) { 1137 val = (n_top_px > 0) ? above_ref[0] : 129; 1138 } else { 1139 val = (n_left_px > 0) ? left_ref[0] : 127; 1140 } 1141 for (i = 0; i < txhpx; ++i) { 1142 memset(dst, val, txwpx); 1143 dst += dst_stride; 1144 } 1145 return; 1146 } 1147 1148 // NEED_LEFT 1149 if (need_left) { 1150 const int num_left_pixels_needed = 1151 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0); 1152 i = 0; 1153 if (n_left_px > 0) { 1154 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; 1155 if (n_bottomleft_px > 0) { 1156 assert(i == txhpx); 1157 for (; i < txhpx + n_bottomleft_px; i++) 1158 left_col[i] = left_ref[i * ref_stride]; 1159 } 1160 if (i < num_left_pixels_needed) 1161 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i); 1162 } else if (n_top_px > 0) { 1163 memset(left_col, above_ref[0], num_left_pixels_needed); 1164 } 1165 } 1166 1167 // NEED_ABOVE 1168 if (need_above) { 1169 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0); 1170 if (n_top_px > 0) { 1171 memcpy(above_row, above_ref, n_top_px); 1172 i = n_top_px; 1173 if (n_topright_px > 0) { 1174 assert(n_top_px == txwpx); 1175 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px); 1176 i += n_topright_px; 1177 } 1178 if (i < num_top_pixels_needed) 1179 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i); 1180 } else if (n_left_px > 0) { 1181 memset(above_row, left_ref[0], num_top_pixels_needed); 1182 } 1183 } 1184 1185 if (need_above_left) { 1186 if (n_top_px > 0 && n_left_px > 0) { 1187 above_row[-1] = above_ref[-1]; 1188 } else if (n_top_px > 0) { 1189 above_row[-1] = above_ref[0]; 1190 } else if (n_left_px > 0) { 1191 above_row[-1] = left_ref[0]; 1192 } else { 1193 above_row[-1] = 128; 1194 } 1195 left_col[-1] = above_row[-1]; 1196 } 1197 1198 if (use_filter_intra) { 1199 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, 1200 filter_intra_mode); 1201 return; 1202 } 1203 1204 assert(is_dr_mode); 1205 int upsample_above = 0; 1206 int upsample_left = 0; 1207 if (!disable_edge_filter) { 1208 const int need_right = p_angle < 90; 1209 const int need_bottom = p_angle > 180; 1210 if (p_angle != 90 && p_angle != 180) { 1211 assert(need_above_left); 1212 const int ab_le = 1; 1213 if (need_above && need_left && (txwpx + txhpx >= 24)) { 1214 filter_intra_edge_corner(above_row, left_col); 1215 } 1216 if (need_above && n_top_px > 0) { 1217 const int strength = intra_edge_filter_strength( 1218 txwpx, txhpx, p_angle - 90, intra_edge_filter_type); 1219 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0); 1220 av1_filter_intra_edge(above_row - ab_le, n_px, strength); 1221 } 1222 if (need_left && n_left_px > 0) { 1223 const int strength = intra_edge_filter_strength( 1224 txhpx, txwpx, p_angle - 180, intra_edge_filter_type); 1225 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0); 1226 av1_filter_intra_edge(left_col - ab_le, n_px, strength); 1227 } 1228 } 1229 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, 1230 intra_edge_filter_type); 1231 if (need_above && upsample_above) { 1232 const int n_px = txwpx + (need_right ? txhpx : 0); 1233 av1_upsample_intra_edge(above_row, n_px); 1234 } 1235 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, 1236 intra_edge_filter_type); 1237 if (need_left && upsample_left) { 1238 const int n_px = txhpx + (need_bottom ? txwpx : 0); 1239 av1_upsample_intra_edge(left_col, n_px); 1240 } 1241 } 1242 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above, 1243 upsample_left, p_angle); 1244 } 1245 1246 // This function generates the pred data of a given block for non-directional 1247 // intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH). 1248 static void build_non_directional_intra_predictors( 1249 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, 1250 PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) { 1251 const uint8_t *above_ref = ref - ref_stride; 1252 const uint8_t *left_ref = ref - 1; 1253 const int txwpx = tx_size_wide[tx_size]; 1254 const int txhpx = tx_size_high[tx_size]; 1255 const int need_left = extend_modes[mode] & NEED_LEFT; 1256 const int need_above = extend_modes[mode] & NEED_ABOVE; 1257 const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; 1258 int i = 0; 1259 assert(n_top_px >= 0); 1260 assert(n_left_px >= 0); 1261 assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED || 1262 mode == SMOOTH_H_PRED || mode == PAETH_PRED); 1263 1264 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { 1265 int val = 0; 1266 if (need_left) { 1267 val = (n_top_px > 0) ? above_ref[0] : 129; 1268 } else { 1269 val = (n_left_px > 0) ? left_ref[0] : 127; 1270 } 1271 for (i = 0; i < txhpx; ++i) { 1272 memset(dst, val, txwpx); 1273 dst += dst_stride; 1274 } 1275 return; 1276 } 1277 1278 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1279 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1280 uint8_t *const above_row = above_data + 16; 1281 uint8_t *const left_col = left_data + 16; 1282 1283 if (need_left) { 1284 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS); 1285 if (n_left_px > 0) { 1286 for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; 1287 if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i); 1288 } else if (n_top_px > 0) { 1289 memset(left_col, above_ref[0], txhpx); 1290 } 1291 } 1292 1293 if (need_above) { 1294 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS); 1295 if (n_top_px > 0) { 1296 memcpy(above_row, above_ref, n_top_px); 1297 i = n_top_px; 1298 if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i); 1299 } else if (n_left_px > 0) { 1300 memset(above_row, left_ref[0], txwpx); 1301 } 1302 } 1303 1304 if (need_above_left) { 1305 if (n_top_px > 0 && n_left_px > 0) { 1306 above_row[-1] = above_ref[-1]; 1307 } else if (n_top_px > 0) { 1308 above_row[-1] = above_ref[0]; 1309 } else if (n_left_px > 0) { 1310 above_row[-1] = left_ref[0]; 1311 } else { 1312 above_row[-1] = 128; 1313 } 1314 left_col[-1] = above_row[-1]; 1315 } 1316 1317 if (mode == DC_PRED) { 1318 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row, 1319 left_col); 1320 } else { 1321 pred[mode][tx_size](dst, dst_stride, above_row, left_col); 1322 } 1323 } 1324 1325 #if CONFIG_AV1_HIGHBITDEPTH 1326 void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) { 1327 if (!strength) return; 1328 1329 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 }, 1330 { 0, 5, 6, 5, 0 }, 1331 { 2, 4, 4, 4, 2 } }; 1332 const int filt = strength - 1; 1333 uint16_t edge[129]; 1334 1335 memcpy(edge, p, sz * sizeof(*p)); 1336 for (int i = 1; i < sz; i++) { 1337 int s = 0; 1338 for (int j = 0; j < INTRA_EDGE_TAPS; j++) { 1339 int k = i - 2 + j; 1340 k = (k < 0) ? 0 : k; 1341 k = (k > sz - 1) ? sz - 1 : k; 1342 s += edge[k] * kernel[filt][j]; 1343 } 1344 s = (s + 8) >> 4; 1345 p[i] = s; 1346 } 1347 } 1348 1349 static void highbd_filter_intra_edge_corner(uint16_t *p_above, 1350 uint16_t *p_left) { 1351 const int kernel[3] = { 5, 6, 5 }; 1352 1353 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) + 1354 (p_above[0] * kernel[2]); 1355 s = (s + 8) >> 4; 1356 p_above[-1] = s; 1357 p_left[-1] = s; 1358 } 1359 1360 void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) { 1361 // interpolate half-sample positions 1362 assert(sz <= MAX_UPSAMPLE_SZ); 1363 1364 uint16_t in[MAX_UPSAMPLE_SZ + 3]; 1365 // copy p[-1..(sz-1)] and extend first and last samples 1366 in[0] = p[-1]; 1367 in[1] = p[-1]; 1368 for (int i = 0; i < sz; i++) { 1369 in[i + 2] = p[i]; 1370 } 1371 in[sz + 2] = p[sz - 1]; 1372 1373 // interpolate half-sample edge positions 1374 p[-2] = in[0]; 1375 for (int i = 0; i < sz; i++) { 1376 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3]; 1377 s = (s + 8) >> 4; 1378 s = clip_pixel_highbd(s, bd); 1379 p[2 * i - 1] = s; 1380 p[2 * i] = in[i + 2]; 1381 } 1382 } 1383 1384 static void highbd_build_directional_and_filter_intra_predictors( 1385 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride, 1386 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode, 1387 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px, 1388 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type, 1389 int bit_depth) { 1390 int i; 1391 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); 1392 const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8); 1393 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1394 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1395 uint16_t *const above_row = above_data + 16; 1396 uint16_t *const left_col = left_data + 16; 1397 const int txwpx = tx_size_wide[tx_size]; 1398 const int txhpx = tx_size_high[tx_size]; 1399 int need_left = extend_modes[mode] & NEED_LEFT; 1400 int need_above = extend_modes[mode] & NEED_ABOVE; 1401 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; 1402 const uint16_t *above_ref = ref - ref_stride; 1403 const uint16_t *left_ref = ref - 1; 1404 const int is_dr_mode = av1_is_directional_mode(mode); 1405 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES; 1406 assert(use_filter_intra || is_dr_mode); 1407 const int base = 128 << (bit_depth - 8); 1408 // The left_data, above_data buffers must be zeroed to fix some intermittent 1409 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4 1410 // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are 1411 // seen to be the potential reason for this issue. 1412 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS); 1413 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS); 1414 1415 // The default values if ref pixels are not available: 1416 // base base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1 1417 // base+1 A B .. Y Z 1418 // base+1 C D .. W X 1419 // base+1 E F .. U V 1420 // base+1 G H .. S T T T T T 1421 1422 if (is_dr_mode) { 1423 if (p_angle <= 90) 1424 need_above = 1, need_left = 0, need_above_left = 1; 1425 else if (p_angle < 180) 1426 need_above = 1, need_left = 1, need_above_left = 1; 1427 else 1428 need_above = 0, need_left = 1, need_above_left = 1; 1429 } 1430 if (use_filter_intra) need_left = need_above = need_above_left = 1; 1431 1432 assert(n_top_px >= 0); 1433 assert(n_topright_px >= -1); 1434 assert(n_left_px >= 0); 1435 assert(n_bottomleft_px >= -1); 1436 1437 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { 1438 int val; 1439 if (need_left) { 1440 val = (n_top_px > 0) ? above_ref[0] : base + 1; 1441 } else { 1442 val = (n_left_px > 0) ? left_ref[0] : base - 1; 1443 } 1444 for (i = 0; i < txhpx; ++i) { 1445 aom_memset16(dst, val, txwpx); 1446 dst += dst_stride; 1447 } 1448 return; 1449 } 1450 1451 // NEED_LEFT 1452 if (need_left) { 1453 const int num_left_pixels_needed = 1454 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0); 1455 i = 0; 1456 if (n_left_px > 0) { 1457 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; 1458 if (n_bottomleft_px > 0) { 1459 assert(i == txhpx); 1460 for (; i < txhpx + n_bottomleft_px; i++) 1461 left_col[i] = left_ref[i * ref_stride]; 1462 } 1463 if (i < num_left_pixels_needed) 1464 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i); 1465 } else if (n_top_px > 0) { 1466 aom_memset16(left_col, above_ref[0], num_left_pixels_needed); 1467 } 1468 } 1469 1470 // NEED_ABOVE 1471 if (need_above) { 1472 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0); 1473 if (n_top_px > 0) { 1474 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0])); 1475 i = n_top_px; 1476 if (n_topright_px > 0) { 1477 assert(n_top_px == txwpx); 1478 memcpy(above_row + txwpx, above_ref + txwpx, 1479 n_topright_px * sizeof(above_ref[0])); 1480 i += n_topright_px; 1481 } 1482 if (i < num_top_pixels_needed) 1483 aom_memset16(&above_row[i], above_row[i - 1], 1484 num_top_pixels_needed - i); 1485 } else if (n_left_px > 0) { 1486 aom_memset16(above_row, left_ref[0], num_top_pixels_needed); 1487 } 1488 } 1489 1490 if (need_above_left) { 1491 if (n_top_px > 0 && n_left_px > 0) { 1492 above_row[-1] = above_ref[-1]; 1493 } else if (n_top_px > 0) { 1494 above_row[-1] = above_ref[0]; 1495 } else if (n_left_px > 0) { 1496 above_row[-1] = left_ref[0]; 1497 } else { 1498 above_row[-1] = base; 1499 } 1500 left_col[-1] = above_row[-1]; 1501 } 1502 1503 if (use_filter_intra) { 1504 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, 1505 filter_intra_mode, bit_depth); 1506 return; 1507 } 1508 1509 assert(is_dr_mode); 1510 int upsample_above = 0; 1511 int upsample_left = 0; 1512 if (!disable_edge_filter) { 1513 const int need_right = p_angle < 90; 1514 const int need_bottom = p_angle > 180; 1515 if (p_angle != 90 && p_angle != 180) { 1516 assert(need_above_left); 1517 const int ab_le = 1; 1518 if (need_above && need_left && (txwpx + txhpx >= 24)) { 1519 highbd_filter_intra_edge_corner(above_row, left_col); 1520 } 1521 if (need_above && n_top_px > 0) { 1522 const int strength = intra_edge_filter_strength( 1523 txwpx, txhpx, p_angle - 90, intra_edge_filter_type); 1524 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0); 1525 av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength); 1526 } 1527 if (need_left && n_left_px > 0) { 1528 const int strength = intra_edge_filter_strength( 1529 txhpx, txwpx, p_angle - 180, intra_edge_filter_type); 1530 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0); 1531 av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength); 1532 } 1533 } 1534 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, 1535 intra_edge_filter_type); 1536 if (need_above && upsample_above) { 1537 const int n_px = txwpx + (need_right ? txhpx : 0); 1538 av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth); 1539 } 1540 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, 1541 intra_edge_filter_type); 1542 if (need_left && upsample_left) { 1543 const int n_px = txhpx + (need_bottom ? txwpx : 0); 1544 av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth); 1545 } 1546 } 1547 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col, 1548 upsample_above, upsample_left, p_angle, bit_depth); 1549 } 1550 1551 // For HBD encode/decode, this function generates the pred data of a given 1552 // block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, 1553 // SMOOTH_V and PAETH). 1554 static void highbd_build_non_directional_intra_predictors( 1555 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride, 1556 PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px, 1557 int bit_depth) { 1558 int i = 0; 1559 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); 1560 const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8); 1561 const int txwpx = tx_size_wide[tx_size]; 1562 const int txhpx = tx_size_high[tx_size]; 1563 int need_left = extend_modes[mode] & NEED_LEFT; 1564 int need_above = extend_modes[mode] & NEED_ABOVE; 1565 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT; 1566 const uint16_t *above_ref = ref - ref_stride; 1567 const uint16_t *left_ref = ref - 1; 1568 const int base = 128 << (bit_depth - 8); 1569 1570 assert(n_top_px >= 0); 1571 assert(n_left_px >= 0); 1572 assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED || 1573 mode == SMOOTH_H_PRED || mode == PAETH_PRED); 1574 1575 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) { 1576 int val = 0; 1577 if (need_left) { 1578 val = (n_top_px > 0) ? above_ref[0] : base + 1; 1579 } else { 1580 val = (n_left_px > 0) ? left_ref[0] : base - 1; 1581 } 1582 for (i = 0; i < txhpx; ++i) { 1583 aom_memset16(dst, val, txwpx); 1584 dst += dst_stride; 1585 } 1586 return; 1587 } 1588 1589 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1590 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); 1591 uint16_t *const above_row = above_data + 16; 1592 uint16_t *const left_col = left_data + 16; 1593 1594 if (need_left) { 1595 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS); 1596 if (n_left_px > 0) { 1597 for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; 1598 if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i); 1599 } else if (n_top_px > 0) { 1600 aom_memset16(left_col, above_ref[0], txhpx); 1601 } 1602 } 1603 1604 if (need_above) { 1605 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS); 1606 if (n_top_px > 0) { 1607 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0])); 1608 i = n_top_px; 1609 if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i)); 1610 } else if (n_left_px > 0) { 1611 aom_memset16(above_row, left_ref[0], txwpx); 1612 } 1613 } 1614 1615 if (need_above_left) { 1616 if (n_top_px > 0 && n_left_px > 0) { 1617 above_row[-1] = above_ref[-1]; 1618 } else if (n_top_px > 0) { 1619 above_row[-1] = above_ref[0]; 1620 } else if (n_left_px > 0) { 1621 above_row[-1] = left_ref[0]; 1622 } else { 1623 above_row[-1] = base; 1624 } 1625 left_col[-1] = above_row[-1]; 1626 } 1627 1628 if (mode == DC_PRED) { 1629 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size]( 1630 dst, dst_stride, above_row, left_col, bit_depth); 1631 } else { 1632 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth); 1633 } 1634 } 1635 #endif // CONFIG_AV1_HIGHBITDEPTH 1636 1637 static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x, 1638 int subsampling_y) { 1639 assert(subsampling_x >= 0 && subsampling_x < 2); 1640 assert(subsampling_y >= 0 && subsampling_y < 2); 1641 BLOCK_SIZE bs = bsize; 1642 switch (bsize) { 1643 case BLOCK_4X4: 1644 if (subsampling_x == 1 && subsampling_y == 1) 1645 bs = BLOCK_8X8; 1646 else if (subsampling_x == 1) 1647 bs = BLOCK_8X4; 1648 else if (subsampling_y == 1) 1649 bs = BLOCK_4X8; 1650 break; 1651 case BLOCK_4X8: 1652 if (subsampling_x == 1 && subsampling_y == 1) 1653 bs = BLOCK_8X8; 1654 else if (subsampling_x == 1) 1655 bs = BLOCK_8X8; 1656 else if (subsampling_y == 1) 1657 bs = BLOCK_4X8; 1658 break; 1659 case BLOCK_8X4: 1660 if (subsampling_x == 1 && subsampling_y == 1) 1661 bs = BLOCK_8X8; 1662 else if (subsampling_x == 1) 1663 bs = BLOCK_8X4; 1664 else if (subsampling_y == 1) 1665 bs = BLOCK_8X8; 1666 break; 1667 case BLOCK_4X16: 1668 if (subsampling_x == 1 && subsampling_y == 1) 1669 bs = BLOCK_8X16; 1670 else if (subsampling_x == 1) 1671 bs = BLOCK_8X16; 1672 else if (subsampling_y == 1) 1673 bs = BLOCK_4X16; 1674 break; 1675 case BLOCK_16X4: 1676 if (subsampling_x == 1 && subsampling_y == 1) 1677 bs = BLOCK_16X8; 1678 else if (subsampling_x == 1) 1679 bs = BLOCK_16X4; 1680 else if (subsampling_y == 1) 1681 bs = BLOCK_16X8; 1682 break; 1683 default: break; 1684 } 1685 return bs; 1686 } 1687 1688 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size, 1689 int enable_intra_edge_filter, int wpx, int hpx, 1690 TX_SIZE tx_size, PREDICTION_MODE mode, 1691 int angle_delta, int use_palette, 1692 FILTER_INTRA_MODE filter_intra_mode, 1693 const uint8_t *ref, int ref_stride, uint8_t *dst, 1694 int dst_stride, int col_off, int row_off, 1695 int plane) { 1696 const MB_MODE_INFO *const mbmi = xd->mi[0]; 1697 const int txwpx = tx_size_wide[tx_size]; 1698 const int txhpx = tx_size_high[tx_size]; 1699 const int x = col_off << MI_SIZE_LOG2; 1700 const int y = row_off << MI_SIZE_LOG2; 1701 const int is_hbd = is_cur_buf_hbd(xd); 1702 1703 assert(mode < INTRA_MODES); 1704 1705 if (use_palette) { 1706 int r, c; 1707 const uint8_t *const map = xd->plane[plane != 0].color_index_map + 1708 xd->color_index_map_offset[plane != 0]; 1709 const uint16_t *const palette = 1710 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE; 1711 if (is_hbd) { 1712 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); 1713 for (r = 0; r < txhpx; ++r) { 1714 for (c = 0; c < txwpx; ++c) { 1715 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]]; 1716 } 1717 } 1718 } else { 1719 for (r = 0; r < txhpx; ++r) { 1720 for (c = 0; c < txwpx; ++c) { 1721 dst[r * dst_stride + c] = 1722 (uint8_t)palette[map[(r + y) * wpx + c + x]]; 1723 } 1724 } 1725 } 1726 return; 1727 } 1728 1729 const struct macroblockd_plane *const pd = &xd->plane[plane]; 1730 const int ss_x = pd->subsampling_x; 1731 const int ss_y = pd->subsampling_y; 1732 const int have_top = 1733 row_off || (ss_y ? xd->chroma_up_available : xd->up_available); 1734 const int have_left = 1735 col_off || (ss_x ? xd->chroma_left_available : xd->left_available); 1736 1737 // Distance between the right edge of this prediction block to 1738 // the frame right edge 1739 const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx; 1740 // Distance between the bottom edge of this prediction block to 1741 // the frame bottom edge 1742 const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx; 1743 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES; 1744 const int is_dr_mode = av1_is_directional_mode(mode); 1745 1746 // The computations in this function, as well as in build_intra_predictors(), 1747 // are generalized for all intra modes. Some of these operations are not 1748 // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H, 1749 // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a 1750 // separate function build_non_directional_intra_predictors() is introduced 1751 // for these modes to avoid redundant computations while generating pred data. 1752 1753 const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0; 1754 const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0; 1755 if (!use_filter_intra && !is_dr_mode) { 1756 #if CONFIG_AV1_HIGHBITDEPTH 1757 if (is_hbd) { 1758 highbd_build_non_directional_intra_predictors( 1759 ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px, 1760 xd->bd); 1761 return; 1762 } 1763 #endif // CONFIG_AV1_HIGHBITDEPTH 1764 build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride, 1765 mode, tx_size, n_top_px, n_left_px); 1766 return; 1767 } 1768 1769 const int txw = tx_size_wide_unit[tx_size]; 1770 const int txh = tx_size_high_unit[tx_size]; 1771 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); 1772 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); 1773 const int right_available = 1774 mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end; 1775 const int bottom_available = 1776 (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end); 1777 1778 const PARTITION_TYPE partition = mbmi->partition; 1779 1780 BLOCK_SIZE bsize = mbmi->bsize; 1781 // force 4x4 chroma component block size. 1782 if (ss_x || ss_y) { 1783 bsize = scale_chroma_bsize(bsize, ss_x, ss_y); 1784 } 1785 1786 int p_angle = 0; 1787 int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT; 1788 int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT; 1789 1790 if (use_filter_intra) { 1791 need_top_right = 0; 1792 need_bottom_left = 0; 1793 } 1794 if (is_dr_mode) { 1795 p_angle = mode_to_angle_map[mode] + angle_delta; 1796 need_top_right = p_angle < 90; 1797 need_bottom_left = p_angle > 180; 1798 } 1799 1800 // Possible states for have_top_right(TR) and have_bottom_left(BL) 1801 // -1 : TR and BL are not needed 1802 // 0 : TR and BL are needed but not available 1803 // > 0 : TR and BL are needed and pixels are available 1804 const int have_top_right = 1805 need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top, 1806 right_available, partition, tx_size, 1807 row_off, col_off, ss_x, ss_y) 1808 : -1; 1809 const int have_bottom_left = 1810 need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col, 1811 bottom_available, have_left, partition, 1812 tx_size, row_off, col_off, ss_x, ss_y) 1813 : -1; 1814 1815 const int disable_edge_filter = !enable_intra_edge_filter; 1816 const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane); 1817 const int n_topright_px = 1818 have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right; 1819 const int n_bottomleft_px = 1820 have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left; 1821 #if CONFIG_AV1_HIGHBITDEPTH 1822 if (is_hbd) { 1823 highbd_build_directional_and_filter_intra_predictors( 1824 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode, 1825 tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px, 1826 n_bottomleft_px, intra_edge_filter_type, xd->bd); 1827 return; 1828 } 1829 #endif 1830 build_directional_and_filter_intra_predictors( 1831 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode, 1832 tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px, 1833 n_bottomleft_px, intra_edge_filter_type); 1834 } 1835 1836 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd, 1837 int plane, int blk_col, int blk_row, 1838 TX_SIZE tx_size) { 1839 const MB_MODE_INFO *const mbmi = xd->mi[0]; 1840 struct macroblockd_plane *const pd = &xd->plane[plane]; 1841 const int dst_stride = pd->dst.stride; 1842 uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2]; 1843 const PREDICTION_MODE mode = 1844 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode); 1845 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0; 1846 const FILTER_INTRA_MODE filter_intra_mode = 1847 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra) 1848 ? mbmi->filter_intra_mode_info.filter_intra_mode 1849 : FILTER_INTRA_MODES; 1850 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP; 1851 const SequenceHeader *seq_params = cm->seq_params; 1852 1853 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 1854 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) { 1855 #if CONFIG_DEBUG 1856 assert(is_cfl_allowed(xd)); 1857 const BLOCK_SIZE plane_bsize = 1858 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y); 1859 (void)plane_bsize; 1860 assert(plane_bsize < BLOCK_SIZES_ALL); 1861 if (!xd->lossless[mbmi->segment_id]) { 1862 assert(blk_col == 0); 1863 assert(blk_row == 0); 1864 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]); 1865 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]); 1866 } 1867 #endif 1868 CFL_CTX *const cfl = &xd->cfl; 1869 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane); 1870 if (!cfl->dc_pred_is_cached[pred_plane]) { 1871 av1_predict_intra_block(xd, seq_params->sb_size, 1872 seq_params->enable_intra_edge_filter, pd->width, 1873 pd->height, tx_size, mode, angle_delta, 1874 use_palette, filter_intra_mode, dst, dst_stride, 1875 dst, dst_stride, blk_col, blk_row, plane); 1876 if (cfl->use_dc_pred_cache) { 1877 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]); 1878 cfl->dc_pred_is_cached[pred_plane] = true; 1879 } 1880 } else { 1881 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane); 1882 } 1883 av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane); 1884 return; 1885 } 1886 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 1887 av1_predict_intra_block( 1888 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width, 1889 pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode, 1890 dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane); 1891 } 1892 1893 void av1_init_intra_predictors(void) { 1894 aom_once(init_intra_predictors_internal); 1895 }