cfl.h (17116B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_COMMON_CFL_H_ 13 #define AOM_AV1_COMMON_CFL_H_ 14 15 #include "av1/common/av1_common_int.h" 16 #include "av1/common/blockd.h" 17 18 // Can we use CfL for the current block? 19 static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) { 20 const MB_MODE_INFO *mbmi = xd->mi[0]; 21 const BLOCK_SIZE bsize = mbmi->bsize; 22 assert(bsize < BLOCK_SIZES_ALL); 23 if (xd->lossless[mbmi->segment_id]) { 24 // In lossless, CfL is available when the partition size is equal to the 25 // transform size. 26 const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; 27 const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; 28 const int plane_bsize = get_plane_block_size(bsize, ssx, ssy); 29 return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4); 30 } 31 // Spec: CfL is available to luma partitions lesser than or equal to 32x32 32 return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 && 33 block_size_high[bsize] <= 32); 34 } 35 36 // Do we need to save the luma pixels from the current block, 37 // for a possible future CfL prediction? 38 static inline CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm, 39 const MACROBLOCKD *xd) { 40 const MB_MODE_INFO *mbmi = xd->mi[0]; 41 42 if (cm->seq_params->monochrome) return CFL_DISALLOWED; 43 44 if (!xd->is_chroma_ref) { 45 // For non-chroma-reference blocks, we should always store the luma pixels, 46 // in case the corresponding chroma-reference block uses CfL. 47 // Note that this can only happen for block sizes which are <8 on 48 // their shortest side, as otherwise they would be chroma reference 49 // blocks. 50 return CFL_ALLOWED; 51 } 52 53 // If this block has chroma information, we know whether we're 54 // actually going to perform a CfL prediction 55 return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) && 56 mbmi->uv_mode == UV_CFL_PRED); 57 } 58 59 static inline int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) { 60 int scaled_luma_q6 = alpha_q3 * pred_buf_q3; 61 return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6); 62 } 63 64 static inline CFL_PRED_TYPE get_cfl_pred_type(int plane) { 65 assert(plane > 0); 66 return (CFL_PRED_TYPE)(plane - 1); 67 } 68 69 static inline void clear_cfl_dc_pred_cache_flags(CFL_CTX *cfl) { 70 cfl->use_dc_pred_cache = false; 71 cfl->dc_pred_is_cached[CFL_PRED_U] = false; 72 cfl->dc_pred_is_cached[CFL_PRED_V] = false; 73 } 74 75 void av1_cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, 76 TX_SIZE tx_size, int plane); 77 78 void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); 79 80 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, 81 BLOCK_SIZE bsize); 82 83 void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input, 84 CFL_PRED_TYPE pred_plane, int width); 85 86 void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, 87 TX_SIZE tx_size, CFL_PRED_TYPE pred_plane); 88 89 // Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth. 90 #define CFL_lbd_TYPE uint8_t *cfl_type 91 #define CFL_hbd_TYPE uint16_t *cfl_type 92 93 // Declare a size-specific wrapper for the size-generic function. The compiler 94 // will inline the size generic function in here, the advantage is that the size 95 // will be constant allowing for loop unrolling and other constant propagated 96 // goodness. 97 #define CFL_SUBSAMPLE(arch, sub, bd, width, height) \ 98 void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \ 99 const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3); \ 100 void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \ 101 const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ 102 cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ 103 output_q3, width, height); \ 104 } 105 106 // Declare size-specific wrappers for all valid CfL sizes. 107 #define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \ 108 CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \ 109 CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \ 110 CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \ 111 CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \ 112 CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \ 113 CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \ 114 CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \ 115 CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \ 116 CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \ 117 CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \ 118 CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \ 119 CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \ 120 CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \ 121 CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \ 122 cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \ 123 TX_SIZE tx_size) { \ 124 CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ 125 return subfn_##sub[tx_size]; \ 126 } 127 128 // Declare an architecture-specific array of function pointers for size-specific 129 // wrappers. 130 #define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ 131 static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \ 132 cfl_subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \ 133 cfl_subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \ 134 cfl_subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \ 135 cfl_subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \ 136 NULL, /* 64x64 (invalid CFL size) */ \ 137 cfl_subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \ 138 cfl_subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \ 139 cfl_subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \ 140 cfl_subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \ 141 cfl_subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \ 142 cfl_subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \ 143 NULL, /* 32x64 (invalid CFL size) */ \ 144 NULL, /* 64x32 (invalid CFL size) */ \ 145 cfl_subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \ 146 cfl_subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \ 147 cfl_subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \ 148 cfl_subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \ 149 NULL, /* 16x64 (invalid CFL size) */ \ 150 NULL, /* 64x16 (invalid CFL size) */ \ 151 }; 152 153 // The RTCD script does not support passing in an array, so we wrap it in this 154 // function. 155 #if CONFIG_AV1_HIGHBITDEPTH 156 #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \ 157 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \ 158 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \ 159 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \ 160 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \ 161 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \ 162 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd) 163 #else 164 #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \ 165 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \ 166 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \ 167 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) 168 #endif 169 170 // Declare a size-specific wrapper for the size-generic function. The compiler 171 // will inline the size generic function in here, the advantage is that the size 172 // will be constant allowing for loop unrolling and other constant propagated 173 // goodness. 174 #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \ 175 void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \ 176 int16_t *dst); \ 177 void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \ 178 int16_t *dst) { \ 179 subtract_average_##arch(src, dst, width, height, round_offset, \ 180 num_pel_log2); \ 181 } 182 183 // Declare size-specific wrappers for all valid CfL sizes. 184 #define CFL_SUB_AVG_FN(arch) \ 185 CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \ 186 CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \ 187 CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \ 188 CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \ 189 CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \ 190 CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \ 191 CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \ 192 CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \ 193 CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \ 194 CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \ 195 CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \ 196 CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \ 197 CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \ 198 CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \ 199 cfl_subtract_average_fn cfl_get_subtract_average_fn_##arch( \ 200 TX_SIZE tx_size) { \ 201 static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \ 202 cfl_subtract_average_4x4_##arch, /* 4x4 */ \ 203 cfl_subtract_average_8x8_##arch, /* 8x8 */ \ 204 cfl_subtract_average_16x16_##arch, /* 16x16 */ \ 205 cfl_subtract_average_32x32_##arch, /* 32x32 */ \ 206 NULL, /* 64x64 (invalid CFL size) */ \ 207 cfl_subtract_average_4x8_##arch, /* 4x8 */ \ 208 cfl_subtract_average_8x4_##arch, /* 8x4 */ \ 209 cfl_subtract_average_8x16_##arch, /* 8x16 */ \ 210 cfl_subtract_average_16x8_##arch, /* 16x8 */ \ 211 cfl_subtract_average_16x32_##arch, /* 16x32 */ \ 212 cfl_subtract_average_32x16_##arch, /* 32x16 */ \ 213 NULL, /* 32x64 (invalid CFL size) */ \ 214 NULL, /* 64x32 (invalid CFL size) */ \ 215 cfl_subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \ 216 cfl_subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \ 217 cfl_subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \ 218 cfl_subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \ 219 NULL, /* 16x64 (invalid CFL size) */ \ 220 NULL, /* 64x16 (invalid CFL size) */ \ 221 }; \ 222 /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ 223 /* index the function pointer array out of bounds. */ \ 224 return sub_avg[tx_size % TX_SIZES_ALL]; \ 225 } 226 227 #define CFL_PREDICT_lbd(arch, width, height) \ 228 void cfl_predict_lbd_##width##x##height##_##arch( \ 229 const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, int alpha_q3); \ 230 void cfl_predict_lbd_##width##x##height##_##arch( \ 231 const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, \ 232 int alpha_q3) { \ 233 cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ 234 height); \ 235 } 236 237 #if CONFIG_AV1_HIGHBITDEPTH 238 #define CFL_PREDICT_hbd(arch, width, height) \ 239 void cfl_predict_hbd_##width##x##height##_##arch( \ 240 const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \ 241 int bd); \ 242 void cfl_predict_hbd_##width##x##height##_##arch( \ 243 const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \ 244 int bd) { \ 245 cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ 246 height); \ 247 } 248 #endif 249 250 // This wrapper exists because clang format does not like calling macros with 251 // lowercase letters. 252 #define CFL_PREDICT_X(arch, width, height, bd) \ 253 CFL_PREDICT_##bd(arch, width, height) 254 255 #define CFL_PREDICT_FN(arch, bd) \ 256 CFL_PREDICT_X(arch, 4, 4, bd) \ 257 CFL_PREDICT_X(arch, 4, 8, bd) \ 258 CFL_PREDICT_X(arch, 4, 16, bd) \ 259 CFL_PREDICT_X(arch, 8, 4, bd) \ 260 CFL_PREDICT_X(arch, 8, 8, bd) \ 261 CFL_PREDICT_X(arch, 8, 16, bd) \ 262 CFL_PREDICT_X(arch, 8, 32, bd) \ 263 CFL_PREDICT_X(arch, 16, 4, bd) \ 264 CFL_PREDICT_X(arch, 16, 8, bd) \ 265 CFL_PREDICT_X(arch, 16, 16, bd) \ 266 CFL_PREDICT_X(arch, 16, 32, bd) \ 267 CFL_PREDICT_X(arch, 32, 8, bd) \ 268 CFL_PREDICT_X(arch, 32, 16, bd) \ 269 CFL_PREDICT_X(arch, 32, 32, bd) \ 270 cfl_predict_##bd##_fn cfl_get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \ 271 static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \ 272 cfl_predict_##bd##_4x4_##arch, /* 4x4 */ \ 273 cfl_predict_##bd##_8x8_##arch, /* 8x8 */ \ 274 cfl_predict_##bd##_16x16_##arch, /* 16x16 */ \ 275 cfl_predict_##bd##_32x32_##arch, /* 32x32 */ \ 276 NULL, /* 64x64 (invalid CFL size) */ \ 277 cfl_predict_##bd##_4x8_##arch, /* 4x8 */ \ 278 cfl_predict_##bd##_8x4_##arch, /* 8x4 */ \ 279 cfl_predict_##bd##_8x16_##arch, /* 8x16 */ \ 280 cfl_predict_##bd##_16x8_##arch, /* 16x8 */ \ 281 cfl_predict_##bd##_16x32_##arch, /* 16x32 */ \ 282 cfl_predict_##bd##_32x16_##arch, /* 32x16 */ \ 283 NULL, /* 32x64 (invalid CFL size) */ \ 284 NULL, /* 64x32 (invalid CFL size) */ \ 285 cfl_predict_##bd##_4x16_##arch, /* 4x16 */ \ 286 cfl_predict_##bd##_16x4_##arch, /* 16x4 */ \ 287 cfl_predict_##bd##_8x32_##arch, /* 8x32 */ \ 288 cfl_predict_##bd##_32x8_##arch, /* 32x8 */ \ 289 NULL, /* 16x64 (invalid CFL size) */ \ 290 NULL, /* 64x16 (invalid CFL size) */ \ 291 }; \ 292 /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ 293 /* index the function pointer array out of bounds. */ \ 294 return pred[tx_size % TX_SIZES_ALL]; \ 295 } 296 297 #endif // AOM_AV1_COMMON_CFL_H_