tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

reconintra.c (70328B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <math.h>
     14 
     15 #include "config/aom_config.h"
     16 #include "config/aom_dsp_rtcd.h"
     17 #include "config/av1_rtcd.h"
     18 
     19 #include "aom_dsp/aom_dsp_common.h"
     20 #include "aom_mem/aom_mem.h"
     21 #include "aom_ports/aom_once.h"
     22 #include "aom_ports/mem.h"
     23 #include "av1/common/av1_common_int.h"
     24 #include "av1/common/cfl.h"
     25 #include "av1/common/reconintra.h"
     26 
     27 enum {
     28  NEED_LEFT = 1 << 1,
     29  NEED_ABOVE = 1 << 2,
     30  NEED_ABOVERIGHT = 1 << 3,
     31  NEED_ABOVELEFT = 1 << 4,
     32  NEED_BOTTOMLEFT = 1 << 5,
     33 };
     34 
     35 #define INTRA_EDGE_FILT 3
     36 #define INTRA_EDGE_TAPS 5
     37 #define MAX_UPSAMPLE_SZ 16
     38 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
     39 
     40 static const uint8_t extend_modes[INTRA_MODES] = {
     41  NEED_ABOVE | NEED_LEFT,                   // DC
     42  NEED_ABOVE,                               // V
     43  NEED_LEFT,                                // H
     44  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
     45  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
     46  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
     47  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
     48  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
     49  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
     50  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
     51  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
     52  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
     53  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
     54 };
     55 
     56 // Tables to store if the top-right reference pixels are available. The flags
     57 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
     58 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
     59 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
     60 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
     61 //       . . . .
     62 //       . . . .
     63 //       . . o .
     64 //       . . . .
     65 static uint8_t has_tr_4x4[128] = {
     66  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     67  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     68  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     69  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     70  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     71  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     72  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     73  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
     74 };
     75 static uint8_t has_tr_4x8[64] = {
     76  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
     77  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
     78  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
     79  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
     80  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
     81 };
     82 static uint8_t has_tr_8x4[64] = {
     83  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
     84  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
     85  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
     86  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
     87 };
     88 static uint8_t has_tr_8x8[32] = {
     89  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
     90  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
     91 };
     92 static uint8_t has_tr_8x16[16] = {
     93  255, 255, 119, 119, 127, 127, 119, 119,
     94  255, 127, 119, 119, 127, 127, 119, 119,
     95 };
     96 static uint8_t has_tr_16x8[16] = {
     97  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
     98 };
     99 static uint8_t has_tr_16x16[8] = {
    100  255, 85, 119, 85, 127, 85, 119, 85,
    101 };
    102 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
    103 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
    104 static uint8_t has_tr_32x32[2] = { 95, 87 };
    105 static uint8_t has_tr_32x64[1] = { 127 };
    106 static uint8_t has_tr_64x32[1] = { 19 };
    107 static uint8_t has_tr_64x64[1] = { 7 };
    108 static uint8_t has_tr_64x128[1] = { 3 };
    109 static uint8_t has_tr_128x64[1] = { 1 };
    110 static uint8_t has_tr_128x128[1] = { 1 };
    111 static uint8_t has_tr_4x16[32] = {
    112  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
    113  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
    114  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
    115 };
    116 static uint8_t has_tr_16x4[32] = {
    117  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
    118  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
    119 };
    120 static uint8_t has_tr_8x32[8] = {
    121  255, 255, 127, 127, 255, 127, 127, 127,
    122 };
    123 static uint8_t has_tr_32x8[8] = {
    124  15, 0, 5, 0, 7, 0, 5, 0,
    125 };
    126 static uint8_t has_tr_16x64[2] = { 255, 127 };
    127 static uint8_t has_tr_64x16[2] = { 3, 1 };
    128 
    129 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
    130  // 4X4
    131  has_tr_4x4,
    132  // 4X8,       8X4,            8X8
    133  has_tr_4x8, has_tr_8x4, has_tr_8x8,
    134  // 8X16,      16X8,           16X16
    135  has_tr_8x16, has_tr_16x8, has_tr_16x16,
    136  // 16X32,     32X16,          32X32
    137  has_tr_16x32, has_tr_32x16, has_tr_32x32,
    138  // 32X64,     64X32,          64X64
    139  has_tr_32x64, has_tr_64x32, has_tr_64x64,
    140  // 64x128,    128x64,         128x128
    141  has_tr_64x128, has_tr_128x64, has_tr_128x128,
    142  // 4x16,      16x4,            8x32
    143  has_tr_4x16, has_tr_16x4, has_tr_8x32,
    144  // 32x8,      16x64,           64x16
    145  has_tr_32x8, has_tr_16x64, has_tr_64x16
    146 };
    147 
    148 static uint8_t has_tr_vert_8x8[32] = {
    149  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
    150  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
    151 };
    152 static uint8_t has_tr_vert_16x16[8] = {
    153  255, 0, 119, 0, 127, 0, 119, 0,
    154 };
    155 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
    156 static uint8_t has_tr_vert_64x64[1] = { 3 };
    157 
    158 // The _vert_* tables are like the ordinary tables above, but describe the
    159 // order we visit square blocks when doing a PARTITION_VERT_A or
    160 // PARTITION_VERT_B. This is the same order as normal except for on the last
    161 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
    162 // as a pair of squares, which means that these tables work correctly for both
    163 // mixed vertical partition types.
    164 //
    165 // There are tables for each of the square sizes. Vertical rectangles (like
    166 // BLOCK_16X32) use their respective "non-vert" table
    167 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
    168  // 4X4
    169  NULL,
    170  // 4X8,      8X4,         8X8
    171  has_tr_4x8, NULL, has_tr_vert_8x8,
    172  // 8X16,     16X8,        16X16
    173  has_tr_8x16, NULL, has_tr_vert_16x16,
    174  // 16X32,    32X16,       32X32
    175  has_tr_16x32, NULL, has_tr_vert_32x32,
    176  // 32X64,    64X32,       64X64
    177  has_tr_32x64, NULL, has_tr_vert_64x64,
    178  // 64x128,   128x64,      128x128
    179  has_tr_64x128, NULL, has_tr_128x128
    180 };
    181 
    182 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
    183                                       BLOCK_SIZE bsize) {
    184  const uint8_t *ret = NULL;
    185  // If this is a mixed vertical partition, look up bsize in orders_vert.
    186  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
    187    assert(bsize < BLOCK_SIZES);
    188    ret = has_tr_vert_tables[bsize];
    189  } else {
    190    ret = has_tr_tables[bsize];
    191  }
    192  assert(ret);
    193  return ret;
    194 }
    195 
    196 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
    197                         int mi_col, int top_available, int right_available,
    198                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
    199                         int col_off, int ss_x, int ss_y) {
    200  if (!top_available || !right_available) return 0;
    201 
    202  const int bw_unit = mi_size_wide[bsize];
    203  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
    204  const int top_right_count_unit = tx_size_wide_unit[txsz];
    205 
    206  if (row_off > 0) {  // Just need to check if enough pixels on the right.
    207    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
    208      // Special case: For 128x128 blocks, the transform unit whose
    209      // top-right corner is at the center of the block does in fact have
    210      // pixels available at its top-right corner.
    211      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
    212          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
    213        return 1;
    214      }
    215      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
    216      const int col_off_64 = col_off % plane_bw_unit_64;
    217      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
    218    }
    219    return col_off + top_right_count_unit < plane_bw_unit;
    220  } else {
    221    // All top-right pixels are in the block above, which is already available.
    222    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
    223 
    224    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
    225    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
    226    const int sb_mi_size = mi_size_high[sb_size];
    227    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
    228    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
    229 
    230    // Top row of superblock: so top-right pixels are in the top and/or
    231    // top-right superblocks, both of which are already available.
    232    if (blk_row_in_sb == 0) return 1;
    233 
    234    // Rightmost column of superblock (and not the top row): so top-right pixels
    235    // fall in the right superblock, which is not available yet.
    236    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
    237      return 0;
    238    }
    239 
    240    // General case (neither top row nor rightmost column): check if the
    241    // top-right block is coded before the current block.
    242    const int this_blk_index =
    243        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
    244        blk_col_in_sb + 0;
    245    const int idx1 = this_blk_index / 8;
    246    const int idx2 = this_blk_index % 8;
    247    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
    248    return (has_tr_table[idx1] >> idx2) & 1;
    249  }
    250 }
    251 
    252 // Similar to the has_tr_* tables, but store if the bottom-left reference
    253 // pixels are available.
    254 static uint8_t has_bl_4x4[128] = {
    255  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
    256  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
    257  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
    258  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
    259  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
    260  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
    261  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
    262 };
    263 static uint8_t has_bl_4x8[64] = {
    264  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
    265  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
    266  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
    267  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
    268 };
    269 static uint8_t has_bl_8x4[64] = {
    270  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
    271  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
    272  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
    273  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
    274 };
    275 static uint8_t has_bl_8x8[32] = {
    276  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
    277  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
    278 };
    279 static uint8_t has_bl_8x16[16] = {
    280  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
    281 };
    282 static uint8_t has_bl_16x8[16] = {
    283  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
    284 };
    285 static uint8_t has_bl_16x16[8] = {
    286  84, 16, 84, 0, 84, 16, 84, 0,
    287 };
    288 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
    289 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
    290 static uint8_t has_bl_32x32[2] = { 4, 4 };
    291 static uint8_t has_bl_32x64[1] = { 0 };
    292 static uint8_t has_bl_64x32[1] = { 34 };
    293 static uint8_t has_bl_64x64[1] = { 0 };
    294 static uint8_t has_bl_64x128[1] = { 0 };
    295 static uint8_t has_bl_128x64[1] = { 0 };
    296 static uint8_t has_bl_128x128[1] = { 0 };
    297 static uint8_t has_bl_4x16[32] = {
    298  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
    299  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
    300 };
    301 static uint8_t has_bl_16x4[32] = {
    302  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
    303  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
    304 };
    305 static uint8_t has_bl_8x32[8] = {
    306  0, 1, 0, 0, 0, 1, 0, 0,
    307 };
    308 static uint8_t has_bl_32x8[8] = {
    309  238, 78, 238, 14, 238, 78, 238, 14,
    310 };
    311 static uint8_t has_bl_16x64[2] = { 0, 0 };
    312 static uint8_t has_bl_64x16[2] = { 42, 42 };
    313 
    314 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
    315  // 4X4
    316  has_bl_4x4,
    317  // 4X8,         8X4,         8X8
    318  has_bl_4x8, has_bl_8x4, has_bl_8x8,
    319  // 8X16,        16X8,        16X16
    320  has_bl_8x16, has_bl_16x8, has_bl_16x16,
    321  // 16X32,       32X16,       32X32
    322  has_bl_16x32, has_bl_32x16, has_bl_32x32,
    323  // 32X64,       64X32,       64X64
    324  has_bl_32x64, has_bl_64x32, has_bl_64x64,
    325  // 64x128,      128x64,      128x128
    326  has_bl_64x128, has_bl_128x64, has_bl_128x128,
    327  // 4x16,        16x4,        8x32
    328  has_bl_4x16, has_bl_16x4, has_bl_8x32,
    329  // 32x8,        16x64,       64x16
    330  has_bl_32x8, has_bl_16x64, has_bl_64x16
    331 };
    332 
    333 static uint8_t has_bl_vert_8x8[32] = {
    334  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
    335  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
    336 };
    337 static uint8_t has_bl_vert_16x16[8] = {
    338  254, 16, 254, 0, 254, 16, 254, 0,
    339 };
    340 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
    341 static uint8_t has_bl_vert_64x64[1] = { 2 };
    342 
    343 // The _vert_* tables are like the ordinary tables above, but describe the
    344 // order we visit square blocks when doing a PARTITION_VERT_A or
    345 // PARTITION_VERT_B. This is the same order as normal except for on the last
    346 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
    347 // as a pair of squares, which means that these tables work correctly for both
    348 // mixed vertical partition types.
    349 //
    350 // There are tables for each of the square sizes. Vertical rectangles (like
    351 // BLOCK_16X32) use their respective "non-vert" table
    352 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
    353  // 4X4
    354  NULL,
    355  // 4X8,     8X4,         8X8
    356  has_bl_4x8, NULL, has_bl_vert_8x8,
    357  // 8X16,    16X8,        16X16
    358  has_bl_8x16, NULL, has_bl_vert_16x16,
    359  // 16X32,   32X16,       32X32
    360  has_bl_16x32, NULL, has_bl_vert_32x32,
    361  // 32X64,   64X32,       64X64
    362  has_bl_32x64, NULL, has_bl_vert_64x64,
    363  // 64x128,  128x64,      128x128
    364  has_bl_64x128, NULL, has_bl_128x128
    365 };
    366 
    367 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
    368                                       BLOCK_SIZE bsize) {
    369  const uint8_t *ret = NULL;
    370  // If this is a mixed vertical partition, look up bsize in orders_vert.
    371  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
    372    assert(bsize < BLOCK_SIZES);
    373    ret = has_bl_vert_tables[bsize];
    374  } else {
    375    ret = has_bl_tables[bsize];
    376  }
    377  assert(ret);
    378  return ret;
    379 }
    380 
    381 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
    382                           int mi_col, int bottom_available, int left_available,
    383                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
    384                           int col_off, int ss_x, int ss_y) {
    385  if (!bottom_available || !left_available) return 0;
    386 
    387  // Special case for 128x* blocks, when col_off is half the block width.
    388  // This is needed because 128x* superblocks are divided into 64x* blocks in
    389  // raster order
    390  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
    391    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
    392    const int col_off_64 = col_off % plane_bw_unit_64;
    393    if (col_off_64 == 0) {
    394      // We are at the left edge of top-right or bottom-right 64x* block.
    395      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
    396      const int row_off_64 = row_off % plane_bh_unit_64;
    397      const int plane_bh_unit =
    398          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
    399      // Check if all bottom-left pixels are in the left 64x* block (which is
    400      // already coded).
    401      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
    402    }
    403  }
    404 
    405  if (col_off > 0) {
    406    // Bottom-left pixels are in the bottom-left block, which is not available.
    407    return 0;
    408  } else {
    409    const int bh_unit = mi_size_high[bsize];
    410    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
    411    const int bottom_left_count_unit = tx_size_high_unit[txsz];
    412 
    413    // All bottom-left pixels are in the left block, which is already available.
    414    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
    415 
    416    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
    417    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
    418    const int sb_mi_size = mi_size_high[sb_size];
    419    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
    420    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
    421 
    422    // Leftmost column of superblock: so bottom-left pixels maybe in the left
    423    // and/or bottom-left superblocks. But only the left superblock is
    424    // available, so check if all required pixels fall in that superblock.
    425    if (blk_col_in_sb == 0) {
    426      const int blk_start_row_off =
    427          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
    428          ss_y;
    429      const int row_off_in_sb = blk_start_row_off + row_off;
    430      const int sb_height_unit = sb_mi_size >> ss_y;
    431      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
    432    }
    433 
    434    // Bottom row of superblock (and not the leftmost column): so bottom-left
    435    // pixels fall in the bottom superblock, which is not available yet.
    436    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
    437 
    438    // General case (neither leftmost column nor bottom row): check if the
    439    // bottom-left block is coded before the current block.
    440    const int this_blk_index =
    441        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
    442        blk_col_in_sb + 0;
    443    const int idx1 = this_blk_index / 8;
    444    const int idx2 = this_blk_index % 8;
    445    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
    446    return (has_bl_table[idx1] >> idx2) & 1;
    447  }
    448 }
    449 
    450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
    451                              const uint8_t *above, const uint8_t *left);
    452 
    453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
    454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
    455 
    456 #if CONFIG_AV1_HIGHBITDEPTH
    457 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
    458                                   const uint16_t *above, const uint16_t *left,
    459                                   int bd);
    460 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
    461 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
    462 #endif
    463 
    464 static void init_intra_predictors_internal(void) {
    465  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
    466 
    467 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
    468 #define INIT_RECTANGULAR(p, type)             \
    469  p[TX_4X8] = aom_##type##_predictor_4x8;     \
    470  p[TX_8X4] = aom_##type##_predictor_8x4;     \
    471  p[TX_8X16] = aom_##type##_predictor_8x16;   \
    472  p[TX_16X8] = aom_##type##_predictor_16x8;   \
    473  p[TX_16X32] = aom_##type##_predictor_16x32; \
    474  p[TX_32X16] = aom_##type##_predictor_32x16; \
    475  p[TX_32X64] = aom_##type##_predictor_32x64; \
    476  p[TX_64X32] = aom_##type##_predictor_64x32;
    477 #else
    478 #define INIT_RECTANGULAR(p, type)             \
    479  p[TX_4X8] = aom_##type##_predictor_4x8;     \
    480  p[TX_8X4] = aom_##type##_predictor_8x4;     \
    481  p[TX_8X16] = aom_##type##_predictor_8x16;   \
    482  p[TX_16X8] = aom_##type##_predictor_16x8;   \
    483  p[TX_16X32] = aom_##type##_predictor_16x32; \
    484  p[TX_32X16] = aom_##type##_predictor_32x16; \
    485  p[TX_32X64] = aom_##type##_predictor_32x64; \
    486  p[TX_64X32] = aom_##type##_predictor_64x32; \
    487  p[TX_4X16] = aom_##type##_predictor_4x16;   \
    488  p[TX_16X4] = aom_##type##_predictor_16x4;   \
    489  p[TX_8X32] = aom_##type##_predictor_8x32;   \
    490  p[TX_32X8] = aom_##type##_predictor_32x8;   \
    491  p[TX_16X64] = aom_##type##_predictor_16x64; \
    492  p[TX_64X16] = aom_##type##_predictor_64x16;
    493 #endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
    494 
    495 #define INIT_NO_4X4(p, type)                  \
    496  p[TX_8X8] = aom_##type##_predictor_8x8;     \
    497  p[TX_16X16] = aom_##type##_predictor_16x16; \
    498  p[TX_32X32] = aom_##type##_predictor_32x32; \
    499  p[TX_64X64] = aom_##type##_predictor_64x64; \
    500  INIT_RECTANGULAR(p, type)
    501 
    502 #define INIT_ALL_SIZES(p, type)           \
    503  p[TX_4X4] = aom_##type##_predictor_4x4; \
    504  INIT_NO_4X4(p, type)
    505 
    506  INIT_ALL_SIZES(pred[V_PRED], v)
    507  INIT_ALL_SIZES(pred[H_PRED], h)
    508  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
    509  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
    510  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
    511  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
    512  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
    513  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
    514  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
    515  INIT_ALL_SIZES(dc_pred[1][1], dc)
    516 #if CONFIG_AV1_HIGHBITDEPTH
    517  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
    518  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
    519  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
    520  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
    521  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
    522  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
    523  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
    524  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
    525  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
    526  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
    527 #endif
    528 #undef intra_pred_allsizes
    529 }
    530 
    531 // Directional prediction, zone 1: 0 < angle < 90
    532 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
    533                            const uint8_t *above, const uint8_t *left,
    534                            int upsample_above, int dx, int dy) {
    535  int r, c, x, base, shift, val;
    536 
    537  (void)left;
    538  (void)dy;
    539  assert(dy == 1);
    540  assert(dx > 0);
    541 
    542  const int max_base_x = ((bw + bh) - 1) << upsample_above;
    543  const int frac_bits = 6 - upsample_above;
    544  const int base_inc = 1 << upsample_above;
    545  x = dx;
    546  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
    547    base = x >> frac_bits;
    548    shift = ((x << upsample_above) & 0x3F) >> 1;
    549 
    550    if (base >= max_base_x) {
    551      for (int i = r; i < bh; ++i) {
    552        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
    553        dst += stride;
    554      }
    555      return;
    556    }
    557 
    558    for (c = 0; c < bw; ++c, base += base_inc) {
    559      if (base < max_base_x) {
    560        val = above[base] * (32 - shift) + above[base + 1] * shift;
    561        dst[c] = ROUND_POWER_OF_TWO(val, 5);
    562      } else {
    563        dst[c] = above[max_base_x];
    564      }
    565    }
    566  }
    567 }
    568 
    569 // Directional prediction, zone 2: 90 < angle < 180
    570 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
    571                            const uint8_t *above, const uint8_t *left,
    572                            int upsample_above, int upsample_left, int dx,
    573                            int dy) {
    574  assert(dx > 0);
    575  assert(dy > 0);
    576 
    577  const int min_base_x = -(1 << upsample_above);
    578  const int min_base_y = -(1 << upsample_left);
    579  (void)min_base_y;
    580  const int frac_bits_x = 6 - upsample_above;
    581  const int frac_bits_y = 6 - upsample_left;
    582 
    583  for (int r = 0; r < bh; ++r) {
    584    for (int c = 0; c < bw; ++c) {
    585      int val;
    586      int y = r + 1;
    587      int x = (c << 6) - y * dx;
    588      const int base_x = x >> frac_bits_x;
    589      if (base_x >= min_base_x) {
    590        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
    591        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
    592        val = ROUND_POWER_OF_TWO(val, 5);
    593      } else {
    594        x = c + 1;
    595        y = (r << 6) - x * dy;
    596        const int base_y = y >> frac_bits_y;
    597        assert(base_y >= min_base_y);
    598        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
    599        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
    600        val = ROUND_POWER_OF_TWO(val, 5);
    601      }
    602      dst[c] = val;
    603    }
    604    dst += stride;
    605  }
    606 }
    607 
    608 // Directional prediction, zone 3: 180 < angle < 270
    609 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
    610                            const uint8_t *above, const uint8_t *left,
    611                            int upsample_left, int dx, int dy) {
    612  int r, c, y, base, shift, val;
    613 
    614  (void)above;
    615  (void)dx;
    616 
    617  assert(dx == 1);
    618  assert(dy > 0);
    619 
    620  const int max_base_y = (bw + bh - 1) << upsample_left;
    621  const int frac_bits = 6 - upsample_left;
    622  const int base_inc = 1 << upsample_left;
    623  y = dy;
    624  for (c = 0; c < bw; ++c, y += dy) {
    625    base = y >> frac_bits;
    626    shift = ((y << upsample_left) & 0x3F) >> 1;
    627 
    628    for (r = 0; r < bh; ++r, base += base_inc) {
    629      if (base < max_base_y) {
    630        val = left[base] * (32 - shift) + left[base + 1] * shift;
    631        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
    632      } else {
    633        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
    634        break;
    635      }
    636    }
    637  }
    638 }
    639 
    640 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
    641                         const uint8_t *above, const uint8_t *left,
    642                         int upsample_above, int upsample_left, int angle) {
    643  const int dx = av1_get_dx(angle);
    644  const int dy = av1_get_dy(angle);
    645  const int bw = tx_size_wide[tx_size];
    646  const int bh = tx_size_high[tx_size];
    647  assert(angle > 0 && angle < 270);
    648 
    649  if (angle > 0 && angle < 90) {
    650    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
    651                         dy);
    652  } else if (angle > 90 && angle < 180) {
    653    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
    654                         upsample_left, dx, dy);
    655  } else if (angle > 180 && angle < 270) {
    656    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
    657                         dy);
    658  } else if (angle == 90) {
    659    pred[V_PRED][tx_size](dst, stride, above, left);
    660  } else if (angle == 180) {
    661    pred[H_PRED][tx_size](dst, stride, above, left);
    662  }
    663 }
    664 
    665 #if CONFIG_AV1_HIGHBITDEPTH
    666 // Directional prediction, zone 1: 0 < angle < 90
    667 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
    668                                   int bh, const uint16_t *above,
    669                                   const uint16_t *left, int upsample_above,
    670                                   int dx, int dy, int bd) {
    671  int r, c, x, base, shift, val;
    672 
    673  (void)left;
    674  (void)dy;
    675  (void)bd;
    676  assert(dy == 1);
    677  assert(dx > 0);
    678 
    679  const int max_base_x = ((bw + bh) - 1) << upsample_above;
    680  const int frac_bits = 6 - upsample_above;
    681  const int base_inc = 1 << upsample_above;
    682  x = dx;
    683  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
    684    base = x >> frac_bits;
    685    shift = ((x << upsample_above) & 0x3F) >> 1;
    686 
    687    if (base >= max_base_x) {
    688      for (int i = r; i < bh; ++i) {
    689        aom_memset16(dst, above[max_base_x], bw);
    690        dst += stride;
    691      }
    692      return;
    693    }
    694 
    695    for (c = 0; c < bw; ++c, base += base_inc) {
    696      if (base < max_base_x) {
    697        val = above[base] * (32 - shift) + above[base + 1] * shift;
    698        dst[c] = ROUND_POWER_OF_TWO(val, 5);
    699      } else {
    700        dst[c] = above[max_base_x];
    701      }
    702    }
    703  }
    704 }
    705 
    706 // Directional prediction, zone 2: 90 < angle < 180
    707 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
    708                                   int bh, const uint16_t *above,
    709                                   const uint16_t *left, int upsample_above,
    710                                   int upsample_left, int dx, int dy, int bd) {
    711  (void)bd;
    712  assert(dx > 0);
    713  assert(dy > 0);
    714 
    715  const int min_base_x = -(1 << upsample_above);
    716  const int min_base_y = -(1 << upsample_left);
    717  (void)min_base_y;
    718  const int frac_bits_x = 6 - upsample_above;
    719  const int frac_bits_y = 6 - upsample_left;
    720 
    721  for (int r = 0; r < bh; ++r) {
    722    for (int c = 0; c < bw; ++c) {
    723      int val;
    724      int y = r + 1;
    725      int x = (c << 6) - y * dx;
    726      const int base_x = x >> frac_bits_x;
    727      if (base_x >= min_base_x) {
    728        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
    729        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
    730        val = ROUND_POWER_OF_TWO(val, 5);
    731      } else {
    732        x = c + 1;
    733        y = (r << 6) - x * dy;
    734        const int base_y = y >> frac_bits_y;
    735        assert(base_y >= min_base_y);
    736        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
    737        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
    738        val = ROUND_POWER_OF_TWO(val, 5);
    739      }
    740      dst[c] = val;
    741    }
    742    dst += stride;
    743  }
    744 }
    745 
    746 // Directional prediction, zone 3: 180 < angle < 270
    747 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
    748                                   int bh, const uint16_t *above,
    749                                   const uint16_t *left, int upsample_left,
    750                                   int dx, int dy, int bd) {
    751  int r, c, y, base, shift, val;
    752 
    753  (void)above;
    754  (void)dx;
    755  (void)bd;
    756  assert(dx == 1);
    757  assert(dy > 0);
    758 
    759  const int max_base_y = (bw + bh - 1) << upsample_left;
    760  const int frac_bits = 6 - upsample_left;
    761  const int base_inc = 1 << upsample_left;
    762  y = dy;
    763  for (c = 0; c < bw; ++c, y += dy) {
    764    base = y >> frac_bits;
    765    shift = ((y << upsample_left) & 0x3F) >> 1;
    766 
    767    for (r = 0; r < bh; ++r, base += base_inc) {
    768      if (base < max_base_y) {
    769        val = left[base] * (32 - shift) + left[base + 1] * shift;
    770        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
    771      } else {
    772        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
    773        break;
    774      }
    775    }
    776  }
    777 }
    778 
    779 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
    780                                TX_SIZE tx_size, const uint16_t *above,
    781                                const uint16_t *left, int upsample_above,
    782                                int upsample_left, int angle, int bd) {
    783  const int dx = av1_get_dx(angle);
    784  const int dy = av1_get_dy(angle);
    785  const int bw = tx_size_wide[tx_size];
    786  const int bh = tx_size_high[tx_size];
    787  assert(angle > 0 && angle < 270);
    788 
    789  if (angle > 0 && angle < 90) {
    790    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
    791                                upsample_above, dx, dy, bd);
    792  } else if (angle > 90 && angle < 180) {
    793    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
    794                                upsample_above, upsample_left, dx, dy, bd);
    795  } else if (angle > 180 && angle < 270) {
    796    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
    797                                dx, dy, bd);
    798  } else if (angle == 90) {
    799    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
    800  } else if (angle == 180) {
    801    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
    802  }
    803 }
    804 #endif  // CONFIG_AV1_HIGHBITDEPTH
    805 
    806 DECLARE_ALIGNED(16, const int8_t,
    807                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
    808  {
    809      { -6, 10, 0, 0, 0, 12, 0, 0 },
    810      { -5, 2, 10, 0, 0, 9, 0, 0 },
    811      { -3, 1, 1, 10, 0, 7, 0, 0 },
    812      { -3, 1, 1, 2, 10, 5, 0, 0 },
    813      { -4, 6, 0, 0, 0, 2, 12, 0 },
    814      { -3, 2, 6, 0, 0, 2, 9, 0 },
    815      { -3, 2, 2, 6, 0, 2, 7, 0 },
    816      { -3, 1, 2, 2, 6, 3, 5, 0 },
    817  },
    818  {
    819      { -10, 16, 0, 0, 0, 10, 0, 0 },
    820      { -6, 0, 16, 0, 0, 6, 0, 0 },
    821      { -4, 0, 0, 16, 0, 4, 0, 0 },
    822      { -2, 0, 0, 0, 16, 2, 0, 0 },
    823      { -10, 16, 0, 0, 0, 0, 10, 0 },
    824      { -6, 0, 16, 0, 0, 0, 6, 0 },
    825      { -4, 0, 0, 16, 0, 0, 4, 0 },
    826      { -2, 0, 0, 0, 16, 0, 2, 0 },
    827  },
    828  {
    829      { -8, 8, 0, 0, 0, 16, 0, 0 },
    830      { -8, 0, 8, 0, 0, 16, 0, 0 },
    831      { -8, 0, 0, 8, 0, 16, 0, 0 },
    832      { -8, 0, 0, 0, 8, 16, 0, 0 },
    833      { -4, 4, 0, 0, 0, 0, 16, 0 },
    834      { -4, 0, 4, 0, 0, 0, 16, 0 },
    835      { -4, 0, 0, 4, 0, 0, 16, 0 },
    836      { -4, 0, 0, 0, 4, 0, 16, 0 },
    837  },
    838  {
    839      { -2, 8, 0, 0, 0, 10, 0, 0 },
    840      { -1, 3, 8, 0, 0, 6, 0, 0 },
    841      { -1, 2, 3, 8, 0, 4, 0, 0 },
    842      { 0, 1, 2, 3, 8, 2, 0, 0 },
    843      { -1, 4, 0, 0, 0, 3, 10, 0 },
    844      { -1, 3, 4, 0, 0, 4, 6, 0 },
    845      { -1, 2, 3, 4, 0, 4, 4, 0 },
    846      { -1, 2, 2, 3, 4, 3, 3, 0 },
    847  },
    848  {
    849      { -12, 14, 0, 0, 0, 14, 0, 0 },
    850      { -10, 0, 14, 0, 0, 12, 0, 0 },
    851      { -9, 0, 0, 14, 0, 11, 0, 0 },
    852      { -8, 0, 0, 0, 14, 10, 0, 0 },
    853      { -10, 12, 0, 0, 0, 0, 14, 0 },
    854      { -9, 1, 12, 0, 0, 0, 12, 0 },
    855      { -8, 0, 0, 12, 0, 1, 11, 0 },
    856      { -7, 0, 0, 1, 12, 1, 9, 0 },
    857  },
    858 };
    859 
    860 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
    861                                  TX_SIZE tx_size, const uint8_t *above,
    862                                  const uint8_t *left, int mode) {
    863  int r, c;
    864  uint8_t buffer[33][33];
    865  const int bw = tx_size_wide[tx_size];
    866  const int bh = tx_size_high[tx_size];
    867 
    868  assert(bw <= 32 && bh <= 32);
    869 
    870  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
    871  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
    872 
    873  for (r = 1; r < bh + 1; r += 2)
    874    for (c = 1; c < bw + 1; c += 4) {
    875      const uint8_t p0 = buffer[r - 1][c - 1];
    876      const uint8_t p1 = buffer[r - 1][c];
    877      const uint8_t p2 = buffer[r - 1][c + 1];
    878      const uint8_t p3 = buffer[r - 1][c + 2];
    879      const uint8_t p4 = buffer[r - 1][c + 3];
    880      const uint8_t p5 = buffer[r][c - 1];
    881      const uint8_t p6 = buffer[r + 1][c - 1];
    882      for (int k = 0; k < 8; ++k) {
    883        int r_offset = k >> 2;
    884        int c_offset = k & 0x03;
    885        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
    886                 av1_filter_intra_taps[mode][k][1] * p1 +
    887                 av1_filter_intra_taps[mode][k][2] * p2 +
    888                 av1_filter_intra_taps[mode][k][3] * p3 +
    889                 av1_filter_intra_taps[mode][k][4] * p4 +
    890                 av1_filter_intra_taps[mode][k][5] * p5 +
    891                 av1_filter_intra_taps[mode][k][6] * p6;
    892        // Section 7.11.2.3 specifies the right-hand side of the assignment as
    893        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
    894        // Since Clip1() clips a negative value to 0, it is safe to replace
    895        // Round2Signed() with Round2().
    896        buffer[r + r_offset][c + c_offset] =
    897            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
    898      }
    899    }
    900 
    901  for (r = 0; r < bh; ++r) {
    902    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
    903    dst += stride;
    904  }
    905 }
    906 
    907 #if CONFIG_AV1_HIGHBITDEPTH
    908 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
    909                                          TX_SIZE tx_size,
    910                                          const uint16_t *above,
    911                                          const uint16_t *left, int mode,
    912                                          int bd) {
    913  int r, c;
    914  uint16_t buffer[33][33];
    915  const int bw = tx_size_wide[tx_size];
    916  const int bh = tx_size_high[tx_size];
    917 
    918  assert(bw <= 32 && bh <= 32);
    919 
    920  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
    921  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
    922 
    923  for (r = 1; r < bh + 1; r += 2)
    924    for (c = 1; c < bw + 1; c += 4) {
    925      const uint16_t p0 = buffer[r - 1][c - 1];
    926      const uint16_t p1 = buffer[r - 1][c];
    927      const uint16_t p2 = buffer[r - 1][c + 1];
    928      const uint16_t p3 = buffer[r - 1][c + 2];
    929      const uint16_t p4 = buffer[r - 1][c + 3];
    930      const uint16_t p5 = buffer[r][c - 1];
    931      const uint16_t p6 = buffer[r + 1][c - 1];
    932      for (int k = 0; k < 8; ++k) {
    933        int r_offset = k >> 2;
    934        int c_offset = k & 0x03;
    935        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
    936                 av1_filter_intra_taps[mode][k][1] * p1 +
    937                 av1_filter_intra_taps[mode][k][2] * p2 +
    938                 av1_filter_intra_taps[mode][k][3] * p3 +
    939                 av1_filter_intra_taps[mode][k][4] * p4 +
    940                 av1_filter_intra_taps[mode][k][5] * p5 +
    941                 av1_filter_intra_taps[mode][k][6] * p6;
    942        // Section 7.11.2.3 specifies the right-hand side of the assignment as
    943        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
    944        // Since Clip1() clips a negative value to 0, it is safe to replace
    945        // Round2Signed() with Round2().
    946        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
    947            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
    948      }
    949    }
    950 
    951  for (r = 0; r < bh; ++r) {
    952    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
    953    dst += stride;
    954  }
    955 }
    956 #endif  // CONFIG_AV1_HIGHBITDEPTH
    957 
    958 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
    959  if (plane == 0) {
    960    const PREDICTION_MODE mode = mbmi->mode;
    961    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
    962            mode == SMOOTH_H_PRED);
    963  } else {
    964    // uv_mode is not set for inter blocks, so need to explicitly
    965    // detect that case.
    966    if (is_inter_block(mbmi)) return 0;
    967 
    968    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
    969    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
    970            uv_mode == UV_SMOOTH_H_PRED);
    971  }
    972 }
    973 
    974 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
    975  const MB_MODE_INFO *above;
    976  const MB_MODE_INFO *left;
    977 
    978  if (plane == 0) {
    979    above = xd->above_mbmi;
    980    left = xd->left_mbmi;
    981  } else {
    982    above = xd->chroma_above_mbmi;
    983    left = xd->chroma_left_mbmi;
    984  }
    985 
    986  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
    987 }
    988 
    989 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
    990  const int d = abs(delta);
    991  int strength = 0;
    992 
    993  const int blk_wh = bs0 + bs1;
    994  if (type == 0) {
    995    if (blk_wh <= 8) {
    996      if (d >= 56) strength = 1;
    997    } else if (blk_wh <= 12) {
    998      if (d >= 40) strength = 1;
    999    } else if (blk_wh <= 16) {
   1000      if (d >= 40) strength = 1;
   1001    } else if (blk_wh <= 24) {
   1002      if (d >= 8) strength = 1;
   1003      if (d >= 16) strength = 2;
   1004      if (d >= 32) strength = 3;
   1005    } else if (blk_wh <= 32) {
   1006      if (d >= 1) strength = 1;
   1007      if (d >= 4) strength = 2;
   1008      if (d >= 32) strength = 3;
   1009    } else {
   1010      if (d >= 1) strength = 3;
   1011    }
   1012  } else {
   1013    if (blk_wh <= 8) {
   1014      if (d >= 40) strength = 1;
   1015      if (d >= 64) strength = 2;
   1016    } else if (blk_wh <= 16) {
   1017      if (d >= 20) strength = 1;
   1018      if (d >= 48) strength = 2;
   1019    } else if (blk_wh <= 24) {
   1020      if (d >= 4) strength = 3;
   1021    } else {
   1022      if (d >= 1) strength = 3;
   1023    }
   1024  }
   1025  return strength;
   1026 }
   1027 
   1028 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
   1029  if (!strength) return;
   1030 
   1031  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
   1032                                                         { 0, 5, 6, 5, 0 },
   1033                                                         { 2, 4, 4, 4, 2 } };
   1034  const int filt = strength - 1;
   1035  uint8_t edge[129];
   1036 
   1037  memcpy(edge, p, sz * sizeof(*p));
   1038  for (int i = 1; i < sz; i++) {
   1039    int s = 0;
   1040    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
   1041      int k = i - 2 + j;
   1042      k = (k < 0) ? 0 : k;
   1043      k = (k > sz - 1) ? sz - 1 : k;
   1044      s += edge[k] * kernel[filt][j];
   1045    }
   1046    s = (s + 8) >> 4;
   1047    p[i] = s;
   1048  }
   1049 }
   1050 
   1051 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
   1052  const int kernel[3] = { 5, 6, 5 };
   1053 
   1054  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
   1055          (p_above[0] * kernel[2]);
   1056  s = (s + 8) >> 4;
   1057  p_above[-1] = s;
   1058  p_left[-1] = s;
   1059 }
   1060 
   1061 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
   1062  // interpolate half-sample positions
   1063  assert(sz <= MAX_UPSAMPLE_SZ);
   1064 
   1065  uint8_t in[MAX_UPSAMPLE_SZ + 3];
   1066  // copy p[-1..(sz-1)] and extend first and last samples
   1067  in[0] = p[-1];
   1068  in[1] = p[-1];
   1069  for (int i = 0; i < sz; i++) {
   1070    in[i + 2] = p[i];
   1071  }
   1072  in[sz + 2] = p[sz - 1];
   1073 
   1074  // interpolate half-sample edge positions
   1075  p[-2] = in[0];
   1076  for (int i = 0; i < sz; i++) {
   1077    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
   1078    s = clip_pixel((s + 8) >> 4);
   1079    p[2 * i - 1] = s;
   1080    p[2 * i] = in[i + 2];
   1081  }
   1082 }
   1083 
   1084 static void build_directional_and_filter_intra_predictors(
   1085    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
   1086    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
   1087    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
   1088    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
   1089  int i;
   1090  const uint8_t *above_ref = ref - ref_stride;
   1091  const uint8_t *left_ref = ref - 1;
   1092  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1093  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1094  uint8_t *const above_row = above_data + 16;
   1095  uint8_t *const left_col = left_data + 16;
   1096  const int txwpx = tx_size_wide[tx_size];
   1097  const int txhpx = tx_size_high[tx_size];
   1098  int need_left = extend_modes[mode] & NEED_LEFT;
   1099  int need_above = extend_modes[mode] & NEED_ABOVE;
   1100  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
   1101  const int is_dr_mode = av1_is_directional_mode(mode);
   1102  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
   1103  assert(use_filter_intra || is_dr_mode);
   1104  // The left_data, above_data buffers must be zeroed to fix some intermittent
   1105  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
   1106  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
   1107  // be the potential reason for this issue.
   1108  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
   1109  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
   1110 
   1111  // The default values if ref pixels are not available:
   1112  // 128 127 127 .. 127 127 127 127 127 127
   1113  // 129  A   B  ..  Y   Z
   1114  // 129  C   D  ..  W   X
   1115  // 129  E   F  ..  U   V
   1116  // 129  G   H  ..  S   T   T   T   T   T
   1117  // ..
   1118 
   1119  if (is_dr_mode) {
   1120    if (p_angle <= 90)
   1121      need_above = 1, need_left = 0, need_above_left = 1;
   1122    else if (p_angle < 180)
   1123      need_above = 1, need_left = 1, need_above_left = 1;
   1124    else
   1125      need_above = 0, need_left = 1, need_above_left = 1;
   1126  }
   1127  if (use_filter_intra) need_left = need_above = need_above_left = 1;
   1128 
   1129  assert(n_top_px >= 0);
   1130  assert(n_topright_px >= -1);
   1131  assert(n_left_px >= 0);
   1132  assert(n_bottomleft_px >= -1);
   1133 
   1134  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
   1135    int val;
   1136    if (need_left) {
   1137      val = (n_top_px > 0) ? above_ref[0] : 129;
   1138    } else {
   1139      val = (n_left_px > 0) ? left_ref[0] : 127;
   1140    }
   1141    for (i = 0; i < txhpx; ++i) {
   1142      memset(dst, val, txwpx);
   1143      dst += dst_stride;
   1144    }
   1145    return;
   1146  }
   1147 
   1148  // NEED_LEFT
   1149  if (need_left) {
   1150    const int num_left_pixels_needed =
   1151        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
   1152    i = 0;
   1153    if (n_left_px > 0) {
   1154      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
   1155      if (n_bottomleft_px > 0) {
   1156        assert(i == txhpx);
   1157        for (; i < txhpx + n_bottomleft_px; i++)
   1158          left_col[i] = left_ref[i * ref_stride];
   1159      }
   1160      if (i < num_left_pixels_needed)
   1161        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
   1162    } else if (n_top_px > 0) {
   1163      memset(left_col, above_ref[0], num_left_pixels_needed);
   1164    }
   1165  }
   1166 
   1167  // NEED_ABOVE
   1168  if (need_above) {
   1169    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
   1170    if (n_top_px > 0) {
   1171      memcpy(above_row, above_ref, n_top_px);
   1172      i = n_top_px;
   1173      if (n_topright_px > 0) {
   1174        assert(n_top_px == txwpx);
   1175        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
   1176        i += n_topright_px;
   1177      }
   1178      if (i < num_top_pixels_needed)
   1179        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
   1180    } else if (n_left_px > 0) {
   1181      memset(above_row, left_ref[0], num_top_pixels_needed);
   1182    }
   1183  }
   1184 
   1185  if (need_above_left) {
   1186    if (n_top_px > 0 && n_left_px > 0) {
   1187      above_row[-1] = above_ref[-1];
   1188    } else if (n_top_px > 0) {
   1189      above_row[-1] = above_ref[0];
   1190    } else if (n_left_px > 0) {
   1191      above_row[-1] = left_ref[0];
   1192    } else {
   1193      above_row[-1] = 128;
   1194    }
   1195    left_col[-1] = above_row[-1];
   1196  }
   1197 
   1198  if (use_filter_intra) {
   1199    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
   1200                               filter_intra_mode);
   1201    return;
   1202  }
   1203 
   1204  assert(is_dr_mode);
   1205  int upsample_above = 0;
   1206  int upsample_left = 0;
   1207  if (!disable_edge_filter) {
   1208    const int need_right = p_angle < 90;
   1209    const int need_bottom = p_angle > 180;
   1210    if (p_angle != 90 && p_angle != 180) {
   1211      assert(need_above_left);
   1212      const int ab_le = 1;
   1213      if (need_above && need_left && (txwpx + txhpx >= 24)) {
   1214        filter_intra_edge_corner(above_row, left_col);
   1215      }
   1216      if (need_above && n_top_px > 0) {
   1217        const int strength = intra_edge_filter_strength(
   1218            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
   1219        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
   1220        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
   1221      }
   1222      if (need_left && n_left_px > 0) {
   1223        const int strength = intra_edge_filter_strength(
   1224            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
   1225        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
   1226        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
   1227      }
   1228    }
   1229    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
   1230                                                 intra_edge_filter_type);
   1231    if (need_above && upsample_above) {
   1232      const int n_px = txwpx + (need_right ? txhpx : 0);
   1233      av1_upsample_intra_edge(above_row, n_px);
   1234    }
   1235    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
   1236                                                intra_edge_filter_type);
   1237    if (need_left && upsample_left) {
   1238      const int n_px = txhpx + (need_bottom ? txwpx : 0);
   1239      av1_upsample_intra_edge(left_col, n_px);
   1240    }
   1241  }
   1242  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
   1243               upsample_left, p_angle);
   1244 }
   1245 
   1246 // This function generates the pred data of a given block for non-directional
   1247 // intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
   1248 static void build_non_directional_intra_predictors(
   1249    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
   1250    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
   1251  const uint8_t *above_ref = ref - ref_stride;
   1252  const uint8_t *left_ref = ref - 1;
   1253  const int txwpx = tx_size_wide[tx_size];
   1254  const int txhpx = tx_size_high[tx_size];
   1255  const int need_left = extend_modes[mode] & NEED_LEFT;
   1256  const int need_above = extend_modes[mode] & NEED_ABOVE;
   1257  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
   1258  int i = 0;
   1259  assert(n_top_px >= 0);
   1260  assert(n_left_px >= 0);
   1261  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
   1262         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
   1263 
   1264  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
   1265    int val = 0;
   1266    if (need_left) {
   1267      val = (n_top_px > 0) ? above_ref[0] : 129;
   1268    } else {
   1269      val = (n_left_px > 0) ? left_ref[0] : 127;
   1270    }
   1271    for (i = 0; i < txhpx; ++i) {
   1272      memset(dst, val, txwpx);
   1273      dst += dst_stride;
   1274    }
   1275    return;
   1276  }
   1277 
   1278  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1279  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1280  uint8_t *const above_row = above_data + 16;
   1281  uint8_t *const left_col = left_data + 16;
   1282 
   1283  if (need_left) {
   1284    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
   1285    if (n_left_px > 0) {
   1286      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
   1287      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
   1288    } else if (n_top_px > 0) {
   1289      memset(left_col, above_ref[0], txhpx);
   1290    }
   1291  }
   1292 
   1293  if (need_above) {
   1294    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
   1295    if (n_top_px > 0) {
   1296      memcpy(above_row, above_ref, n_top_px);
   1297      i = n_top_px;
   1298      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
   1299    } else if (n_left_px > 0) {
   1300      memset(above_row, left_ref[0], txwpx);
   1301    }
   1302  }
   1303 
   1304  if (need_above_left) {
   1305    if (n_top_px > 0 && n_left_px > 0) {
   1306      above_row[-1] = above_ref[-1];
   1307    } else if (n_top_px > 0) {
   1308      above_row[-1] = above_ref[0];
   1309    } else if (n_left_px > 0) {
   1310      above_row[-1] = left_ref[0];
   1311    } else {
   1312      above_row[-1] = 128;
   1313    }
   1314    left_col[-1] = above_row[-1];
   1315  }
   1316 
   1317  if (mode == DC_PRED) {
   1318    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
   1319                                                  left_col);
   1320  } else {
   1321    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
   1322  }
   1323 }
   1324 
   1325 #if CONFIG_AV1_HIGHBITDEPTH
   1326 void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
   1327  if (!strength) return;
   1328 
   1329  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
   1330                                                         { 0, 5, 6, 5, 0 },
   1331                                                         { 2, 4, 4, 4, 2 } };
   1332  const int filt = strength - 1;
   1333  uint16_t edge[129];
   1334 
   1335  memcpy(edge, p, sz * sizeof(*p));
   1336  for (int i = 1; i < sz; i++) {
   1337    int s = 0;
   1338    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
   1339      int k = i - 2 + j;
   1340      k = (k < 0) ? 0 : k;
   1341      k = (k > sz - 1) ? sz - 1 : k;
   1342      s += edge[k] * kernel[filt][j];
   1343    }
   1344    s = (s + 8) >> 4;
   1345    p[i] = s;
   1346  }
   1347 }
   1348 
   1349 static void highbd_filter_intra_edge_corner(uint16_t *p_above,
   1350                                            uint16_t *p_left) {
   1351  const int kernel[3] = { 5, 6, 5 };
   1352 
   1353  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
   1354          (p_above[0] * kernel[2]);
   1355  s = (s + 8) >> 4;
   1356  p_above[-1] = s;
   1357  p_left[-1] = s;
   1358 }
   1359 
   1360 void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
   1361  // interpolate half-sample positions
   1362  assert(sz <= MAX_UPSAMPLE_SZ);
   1363 
   1364  uint16_t in[MAX_UPSAMPLE_SZ + 3];
   1365  // copy p[-1..(sz-1)] and extend first and last samples
   1366  in[0] = p[-1];
   1367  in[1] = p[-1];
   1368  for (int i = 0; i < sz; i++) {
   1369    in[i + 2] = p[i];
   1370  }
   1371  in[sz + 2] = p[sz - 1];
   1372 
   1373  // interpolate half-sample edge positions
   1374  p[-2] = in[0];
   1375  for (int i = 0; i < sz; i++) {
   1376    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
   1377    s = (s + 8) >> 4;
   1378    s = clip_pixel_highbd(s, bd);
   1379    p[2 * i - 1] = s;
   1380    p[2 * i] = in[i + 2];
   1381  }
   1382 }
   1383 
   1384 static void highbd_build_directional_and_filter_intra_predictors(
   1385    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
   1386    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
   1387    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
   1388    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
   1389    int bit_depth) {
   1390  int i;
   1391  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   1392  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
   1393  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1394  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1395  uint16_t *const above_row = above_data + 16;
   1396  uint16_t *const left_col = left_data + 16;
   1397  const int txwpx = tx_size_wide[tx_size];
   1398  const int txhpx = tx_size_high[tx_size];
   1399  int need_left = extend_modes[mode] & NEED_LEFT;
   1400  int need_above = extend_modes[mode] & NEED_ABOVE;
   1401  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
   1402  const uint16_t *above_ref = ref - ref_stride;
   1403  const uint16_t *left_ref = ref - 1;
   1404  const int is_dr_mode = av1_is_directional_mode(mode);
   1405  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
   1406  assert(use_filter_intra || is_dr_mode);
   1407  const int base = 128 << (bit_depth - 8);
   1408  // The left_data, above_data buffers must be zeroed to fix some intermittent
   1409  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
   1410  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
   1411  // seen to be the potential reason for this issue.
   1412  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
   1413  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
   1414 
   1415  // The default values if ref pixels are not available:
   1416  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
   1417  // base+1   A      B  ..     Y      Z
   1418  // base+1   C      D  ..     W      X
   1419  // base+1   E      F  ..     U      V
   1420  // base+1   G      H  ..     S      T      T      T      T      T
   1421 
   1422  if (is_dr_mode) {
   1423    if (p_angle <= 90)
   1424      need_above = 1, need_left = 0, need_above_left = 1;
   1425    else if (p_angle < 180)
   1426      need_above = 1, need_left = 1, need_above_left = 1;
   1427    else
   1428      need_above = 0, need_left = 1, need_above_left = 1;
   1429  }
   1430  if (use_filter_intra) need_left = need_above = need_above_left = 1;
   1431 
   1432  assert(n_top_px >= 0);
   1433  assert(n_topright_px >= -1);
   1434  assert(n_left_px >= 0);
   1435  assert(n_bottomleft_px >= -1);
   1436 
   1437  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
   1438    int val;
   1439    if (need_left) {
   1440      val = (n_top_px > 0) ? above_ref[0] : base + 1;
   1441    } else {
   1442      val = (n_left_px > 0) ? left_ref[0] : base - 1;
   1443    }
   1444    for (i = 0; i < txhpx; ++i) {
   1445      aom_memset16(dst, val, txwpx);
   1446      dst += dst_stride;
   1447    }
   1448    return;
   1449  }
   1450 
   1451  // NEED_LEFT
   1452  if (need_left) {
   1453    const int num_left_pixels_needed =
   1454        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
   1455    i = 0;
   1456    if (n_left_px > 0) {
   1457      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
   1458      if (n_bottomleft_px > 0) {
   1459        assert(i == txhpx);
   1460        for (; i < txhpx + n_bottomleft_px; i++)
   1461          left_col[i] = left_ref[i * ref_stride];
   1462      }
   1463      if (i < num_left_pixels_needed)
   1464        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
   1465    } else if (n_top_px > 0) {
   1466      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
   1467    }
   1468  }
   1469 
   1470  // NEED_ABOVE
   1471  if (need_above) {
   1472    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
   1473    if (n_top_px > 0) {
   1474      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
   1475      i = n_top_px;
   1476      if (n_topright_px > 0) {
   1477        assert(n_top_px == txwpx);
   1478        memcpy(above_row + txwpx, above_ref + txwpx,
   1479               n_topright_px * sizeof(above_ref[0]));
   1480        i += n_topright_px;
   1481      }
   1482      if (i < num_top_pixels_needed)
   1483        aom_memset16(&above_row[i], above_row[i - 1],
   1484                     num_top_pixels_needed - i);
   1485    } else if (n_left_px > 0) {
   1486      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
   1487    }
   1488  }
   1489 
   1490  if (need_above_left) {
   1491    if (n_top_px > 0 && n_left_px > 0) {
   1492      above_row[-1] = above_ref[-1];
   1493    } else if (n_top_px > 0) {
   1494      above_row[-1] = above_ref[0];
   1495    } else if (n_left_px > 0) {
   1496      above_row[-1] = left_ref[0];
   1497    } else {
   1498      above_row[-1] = base;
   1499    }
   1500    left_col[-1] = above_row[-1];
   1501  }
   1502 
   1503  if (use_filter_intra) {
   1504    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
   1505                                  filter_intra_mode, bit_depth);
   1506    return;
   1507  }
   1508 
   1509  assert(is_dr_mode);
   1510  int upsample_above = 0;
   1511  int upsample_left = 0;
   1512  if (!disable_edge_filter) {
   1513    const int need_right = p_angle < 90;
   1514    const int need_bottom = p_angle > 180;
   1515    if (p_angle != 90 && p_angle != 180) {
   1516      assert(need_above_left);
   1517      const int ab_le = 1;
   1518      if (need_above && need_left && (txwpx + txhpx >= 24)) {
   1519        highbd_filter_intra_edge_corner(above_row, left_col);
   1520      }
   1521      if (need_above && n_top_px > 0) {
   1522        const int strength = intra_edge_filter_strength(
   1523            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
   1524        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
   1525        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
   1526      }
   1527      if (need_left && n_left_px > 0) {
   1528        const int strength = intra_edge_filter_strength(
   1529            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
   1530        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
   1531        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
   1532      }
   1533    }
   1534    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
   1535                                                 intra_edge_filter_type);
   1536    if (need_above && upsample_above) {
   1537      const int n_px = txwpx + (need_right ? txhpx : 0);
   1538      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
   1539    }
   1540    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
   1541                                                intra_edge_filter_type);
   1542    if (need_left && upsample_left) {
   1543      const int n_px = txhpx + (need_bottom ? txwpx : 0);
   1544      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
   1545    }
   1546  }
   1547  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
   1548                      upsample_above, upsample_left, p_angle, bit_depth);
   1549 }
   1550 
   1551 // For HBD encode/decode, this function generates the pred data of a given
   1552 // block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
   1553 // SMOOTH_V and PAETH).
   1554 static void highbd_build_non_directional_intra_predictors(
   1555    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
   1556    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
   1557    int bit_depth) {
   1558  int i = 0;
   1559  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   1560  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
   1561  const int txwpx = tx_size_wide[tx_size];
   1562  const int txhpx = tx_size_high[tx_size];
   1563  int need_left = extend_modes[mode] & NEED_LEFT;
   1564  int need_above = extend_modes[mode] & NEED_ABOVE;
   1565  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
   1566  const uint16_t *above_ref = ref - ref_stride;
   1567  const uint16_t *left_ref = ref - 1;
   1568  const int base = 128 << (bit_depth - 8);
   1569 
   1570  assert(n_top_px >= 0);
   1571  assert(n_left_px >= 0);
   1572  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
   1573         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
   1574 
   1575  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
   1576    int val = 0;
   1577    if (need_left) {
   1578      val = (n_top_px > 0) ? above_ref[0] : base + 1;
   1579    } else {
   1580      val = (n_left_px > 0) ? left_ref[0] : base - 1;
   1581    }
   1582    for (i = 0; i < txhpx; ++i) {
   1583      aom_memset16(dst, val, txwpx);
   1584      dst += dst_stride;
   1585    }
   1586    return;
   1587  }
   1588 
   1589  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1590  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   1591  uint16_t *const above_row = above_data + 16;
   1592  uint16_t *const left_col = left_data + 16;
   1593 
   1594  if (need_left) {
   1595    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
   1596    if (n_left_px > 0) {
   1597      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
   1598      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
   1599    } else if (n_top_px > 0) {
   1600      aom_memset16(left_col, above_ref[0], txhpx);
   1601    }
   1602  }
   1603 
   1604  if (need_above) {
   1605    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
   1606    if (n_top_px > 0) {
   1607      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
   1608      i = n_top_px;
   1609      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
   1610    } else if (n_left_px > 0) {
   1611      aom_memset16(above_row, left_ref[0], txwpx);
   1612    }
   1613  }
   1614 
   1615  if (need_above_left) {
   1616    if (n_top_px > 0 && n_left_px > 0) {
   1617      above_row[-1] = above_ref[-1];
   1618    } else if (n_top_px > 0) {
   1619      above_row[-1] = above_ref[0];
   1620    } else if (n_left_px > 0) {
   1621      above_row[-1] = left_ref[0];
   1622    } else {
   1623      above_row[-1] = base;
   1624    }
   1625    left_col[-1] = above_row[-1];
   1626  }
   1627 
   1628  if (mode == DC_PRED) {
   1629    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
   1630        dst, dst_stride, above_row, left_col, bit_depth);
   1631  } else {
   1632    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
   1633  }
   1634 }
   1635 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1636 
   1637 static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
   1638                                            int subsampling_y) {
   1639  assert(subsampling_x >= 0 && subsampling_x < 2);
   1640  assert(subsampling_y >= 0 && subsampling_y < 2);
   1641  BLOCK_SIZE bs = bsize;
   1642  switch (bsize) {
   1643    case BLOCK_4X4:
   1644      if (subsampling_x == 1 && subsampling_y == 1)
   1645        bs = BLOCK_8X8;
   1646      else if (subsampling_x == 1)
   1647        bs = BLOCK_8X4;
   1648      else if (subsampling_y == 1)
   1649        bs = BLOCK_4X8;
   1650      break;
   1651    case BLOCK_4X8:
   1652      if (subsampling_x == 1 && subsampling_y == 1)
   1653        bs = BLOCK_8X8;
   1654      else if (subsampling_x == 1)
   1655        bs = BLOCK_8X8;
   1656      else if (subsampling_y == 1)
   1657        bs = BLOCK_4X8;
   1658      break;
   1659    case BLOCK_8X4:
   1660      if (subsampling_x == 1 && subsampling_y == 1)
   1661        bs = BLOCK_8X8;
   1662      else if (subsampling_x == 1)
   1663        bs = BLOCK_8X4;
   1664      else if (subsampling_y == 1)
   1665        bs = BLOCK_8X8;
   1666      break;
   1667    case BLOCK_4X16:
   1668      if (subsampling_x == 1 && subsampling_y == 1)
   1669        bs = BLOCK_8X16;
   1670      else if (subsampling_x == 1)
   1671        bs = BLOCK_8X16;
   1672      else if (subsampling_y == 1)
   1673        bs = BLOCK_4X16;
   1674      break;
   1675    case BLOCK_16X4:
   1676      if (subsampling_x == 1 && subsampling_y == 1)
   1677        bs = BLOCK_16X8;
   1678      else if (subsampling_x == 1)
   1679        bs = BLOCK_16X4;
   1680      else if (subsampling_y == 1)
   1681        bs = BLOCK_16X8;
   1682      break;
   1683    default: break;
   1684  }
   1685  return bs;
   1686 }
   1687 
   1688 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
   1689                             int enable_intra_edge_filter, int wpx, int hpx,
   1690                             TX_SIZE tx_size, PREDICTION_MODE mode,
   1691                             int angle_delta, int use_palette,
   1692                             FILTER_INTRA_MODE filter_intra_mode,
   1693                             const uint8_t *ref, int ref_stride, uint8_t *dst,
   1694                             int dst_stride, int col_off, int row_off,
   1695                             int plane) {
   1696  const MB_MODE_INFO *const mbmi = xd->mi[0];
   1697  const int txwpx = tx_size_wide[tx_size];
   1698  const int txhpx = tx_size_high[tx_size];
   1699  const int x = col_off << MI_SIZE_LOG2;
   1700  const int y = row_off << MI_SIZE_LOG2;
   1701  const int is_hbd = is_cur_buf_hbd(xd);
   1702 
   1703  assert(mode < INTRA_MODES);
   1704 
   1705  if (use_palette) {
   1706    int r, c;
   1707    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
   1708                               xd->color_index_map_offset[plane != 0];
   1709    const uint16_t *const palette =
   1710        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
   1711    if (is_hbd) {
   1712      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
   1713      for (r = 0; r < txhpx; ++r) {
   1714        for (c = 0; c < txwpx; ++c) {
   1715          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
   1716        }
   1717      }
   1718    } else {
   1719      for (r = 0; r < txhpx; ++r) {
   1720        for (c = 0; c < txwpx; ++c) {
   1721          dst[r * dst_stride + c] =
   1722              (uint8_t)palette[map[(r + y) * wpx + c + x]];
   1723        }
   1724      }
   1725    }
   1726    return;
   1727  }
   1728 
   1729  const struct macroblockd_plane *const pd = &xd->plane[plane];
   1730  const int ss_x = pd->subsampling_x;
   1731  const int ss_y = pd->subsampling_y;
   1732  const int have_top =
   1733      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
   1734  const int have_left =
   1735      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
   1736 
   1737  // Distance between the right edge of this prediction block to
   1738  // the frame right edge
   1739  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
   1740  // Distance between the bottom edge of this prediction block to
   1741  // the frame bottom edge
   1742  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
   1743  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
   1744  const int is_dr_mode = av1_is_directional_mode(mode);
   1745 
   1746  // The computations in this function, as well as in build_intra_predictors(),
   1747  // are generalized for all intra modes. Some of these operations are not
   1748  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
   1749  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
   1750  // separate function build_non_directional_intra_predictors() is introduced
   1751  // for these modes to avoid redundant computations while generating pred data.
   1752 
   1753  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
   1754  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
   1755  if (!use_filter_intra && !is_dr_mode) {
   1756 #if CONFIG_AV1_HIGHBITDEPTH
   1757    if (is_hbd) {
   1758      highbd_build_non_directional_intra_predictors(
   1759          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
   1760          xd->bd);
   1761      return;
   1762    }
   1763 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1764    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
   1765                                           mode, tx_size, n_top_px, n_left_px);
   1766    return;
   1767  }
   1768 
   1769  const int txw = tx_size_wide_unit[tx_size];
   1770  const int txh = tx_size_high_unit[tx_size];
   1771  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
   1772  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
   1773  const int right_available =
   1774      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
   1775  const int bottom_available =
   1776      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
   1777 
   1778  const PARTITION_TYPE partition = mbmi->partition;
   1779 
   1780  BLOCK_SIZE bsize = mbmi->bsize;
   1781  // force 4x4 chroma component block size.
   1782  if (ss_x || ss_y) {
   1783    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
   1784  }
   1785 
   1786  int p_angle = 0;
   1787  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
   1788  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
   1789 
   1790  if (use_filter_intra) {
   1791    need_top_right = 0;
   1792    need_bottom_left = 0;
   1793  }
   1794  if (is_dr_mode) {
   1795    p_angle = mode_to_angle_map[mode] + angle_delta;
   1796    need_top_right = p_angle < 90;
   1797    need_bottom_left = p_angle > 180;
   1798  }
   1799 
   1800  // Possible states for have_top_right(TR) and have_bottom_left(BL)
   1801  // -1 : TR and BL are not needed
   1802  //  0 : TR and BL are needed but not available
   1803  // > 0 : TR and BL are needed and pixels are available
   1804  const int have_top_right =
   1805      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
   1806                                     right_available, partition, tx_size,
   1807                                     row_off, col_off, ss_x, ss_y)
   1808                     : -1;
   1809  const int have_bottom_left =
   1810      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
   1811                                         bottom_available, have_left, partition,
   1812                                         tx_size, row_off, col_off, ss_x, ss_y)
   1813                       : -1;
   1814 
   1815  const int disable_edge_filter = !enable_intra_edge_filter;
   1816  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
   1817  const int n_topright_px =
   1818      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
   1819  const int n_bottomleft_px =
   1820      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
   1821 #if CONFIG_AV1_HIGHBITDEPTH
   1822  if (is_hbd) {
   1823    highbd_build_directional_and_filter_intra_predictors(
   1824        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
   1825        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
   1826        n_bottomleft_px, intra_edge_filter_type, xd->bd);
   1827    return;
   1828  }
   1829 #endif
   1830  build_directional_and_filter_intra_predictors(
   1831      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
   1832      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
   1833      n_bottomleft_px, intra_edge_filter_type);
   1834 }
   1835 
   1836 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
   1837                                    int plane, int blk_col, int blk_row,
   1838                                    TX_SIZE tx_size) {
   1839  const MB_MODE_INFO *const mbmi = xd->mi[0];
   1840  struct macroblockd_plane *const pd = &xd->plane[plane];
   1841  const int dst_stride = pd->dst.stride;
   1842  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
   1843  const PREDICTION_MODE mode =
   1844      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
   1845  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
   1846  const FILTER_INTRA_MODE filter_intra_mode =
   1847      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
   1848          ? mbmi->filter_intra_mode_info.filter_intra_mode
   1849          : FILTER_INTRA_MODES;
   1850  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
   1851  const SequenceHeader *seq_params = cm->seq_params;
   1852 
   1853 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
   1854  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
   1855 #if CONFIG_DEBUG
   1856    assert(is_cfl_allowed(xd));
   1857    const BLOCK_SIZE plane_bsize =
   1858        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
   1859    (void)plane_bsize;
   1860    assert(plane_bsize < BLOCK_SIZES_ALL);
   1861    if (!xd->lossless[mbmi->segment_id]) {
   1862      assert(blk_col == 0);
   1863      assert(blk_row == 0);
   1864      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
   1865      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
   1866    }
   1867 #endif
   1868    CFL_CTX *const cfl = &xd->cfl;
   1869    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
   1870    if (!cfl->dc_pred_is_cached[pred_plane]) {
   1871      av1_predict_intra_block(xd, seq_params->sb_size,
   1872                              seq_params->enable_intra_edge_filter, pd->width,
   1873                              pd->height, tx_size, mode, angle_delta,
   1874                              use_palette, filter_intra_mode, dst, dst_stride,
   1875                              dst, dst_stride, blk_col, blk_row, plane);
   1876      if (cfl->use_dc_pred_cache) {
   1877        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
   1878        cfl->dc_pred_is_cached[pred_plane] = true;
   1879      }
   1880    } else {
   1881      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
   1882    }
   1883    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
   1884    return;
   1885  }
   1886 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
   1887  av1_predict_intra_block(
   1888      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
   1889      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
   1890      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
   1891 }
   1892 
   1893 void av1_init_intra_predictors(void) {
   1894  aom_once(init_intra_predictors_internal);
   1895 }