av1_inv_txfm2d.c (19490B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "config/aom_dsp_rtcd.h" 13 #include "config/av1_rtcd.h" 14 15 #include "av1/common/enums.h" 16 #include "av1/common/av1_txfm.h" 17 #include "av1/common/av1_inv_txfm1d.h" 18 #include "av1/common/av1_inv_txfm1d_cfg.h" 19 20 void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 21 int stride, int bd) { 22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 23 0.5 shifts per pixel. */ 24 int i; 25 tran_low_t output[16]; 26 tran_low_t a1, b1, c1, d1, e1; 27 const tran_low_t *ip = input; 28 tran_low_t *op = output; 29 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 30 31 for (i = 0; i < 4; i++) { 32 a1 = ip[4 * 0] >> UNIT_QUANT_SHIFT; 33 c1 = ip[4 * 1] >> UNIT_QUANT_SHIFT; 34 d1 = ip[4 * 2] >> UNIT_QUANT_SHIFT; 35 b1 = ip[4 * 3] >> UNIT_QUANT_SHIFT; 36 a1 += c1; 37 d1 -= b1; 38 e1 = (a1 - d1) >> 1; 39 b1 = e1 - b1; 40 c1 = e1 - c1; 41 a1 -= b1; 42 d1 += c1; 43 44 op[4 * 0] = a1; 45 op[4 * 1] = b1; 46 op[4 * 2] = c1; 47 op[4 * 3] = d1; 48 ip++; 49 op++; 50 } 51 52 ip = output; 53 for (i = 0; i < 4; i++) { 54 a1 = ip[0]; 55 c1 = ip[1]; 56 d1 = ip[2]; 57 b1 = ip[3]; 58 a1 += c1; 59 d1 -= b1; 60 e1 = (a1 - d1) >> 1; 61 b1 = e1 - b1; 62 c1 = e1 - c1; 63 a1 -= b1; 64 d1 += c1; 65 66 range_check_value(a1, bd + 1); 67 range_check_value(b1, bd + 1); 68 range_check_value(c1, bd + 1); 69 range_check_value(d1, bd + 1); 70 71 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); 72 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); 73 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); 74 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); 75 76 ip += 4; 77 dest++; 78 } 79 } 80 81 void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, 82 int dest_stride, int bd) { 83 int i; 84 tran_low_t a1, e1; 85 tran_low_t tmp[4]; 86 const tran_low_t *ip = in; 87 tran_low_t *op = tmp; 88 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 89 (void)bd; 90 91 a1 = ip[0 * 4] >> UNIT_QUANT_SHIFT; 92 e1 = a1 >> 1; 93 a1 -= e1; 94 op[0] = a1; 95 op[1] = op[2] = op[3] = e1; 96 97 ip = tmp; 98 for (i = 0; i < 4; i++) { 99 e1 = ip[0] >> 1; 100 a1 = ip[0] - e1; 101 dest[dest_stride * 0] = 102 highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); 103 dest[dest_stride * 1] = 104 highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); 105 dest[dest_stride * 2] = 106 highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); 107 dest[dest_stride * 3] = 108 highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); 109 ip++; 110 dest++; 111 } 112 } 113 114 static inline TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { 115 switch (txfm_type) { 116 case TXFM_TYPE_DCT4: return av1_idct4; 117 case TXFM_TYPE_DCT8: return av1_idct8; 118 case TXFM_TYPE_DCT16: return av1_idct16; 119 case TXFM_TYPE_DCT32: return av1_idct32; 120 case TXFM_TYPE_DCT64: return av1_idct64; 121 case TXFM_TYPE_ADST4: return av1_iadst4; 122 case TXFM_TYPE_ADST8: return av1_iadst8; 123 case TXFM_TYPE_ADST16: return av1_iadst16; 124 case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; 125 case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; 126 case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c; 127 case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c; 128 default: assert(0); return NULL; 129 } 130 } 131 132 static const int8_t inv_shift_4x4[2] = { 0, -4 }; 133 static const int8_t inv_shift_8x8[2] = { -1, -4 }; 134 static const int8_t inv_shift_16x16[2] = { -2, -4 }; 135 static const int8_t inv_shift_32x32[2] = { -2, -4 }; 136 static const int8_t inv_shift_64x64[2] = { -2, -4 }; 137 static const int8_t inv_shift_4x8[2] = { 0, -4 }; 138 static const int8_t inv_shift_8x4[2] = { 0, -4 }; 139 static const int8_t inv_shift_8x16[2] = { -1, -4 }; 140 static const int8_t inv_shift_16x8[2] = { -1, -4 }; 141 static const int8_t inv_shift_16x32[2] = { -1, -4 }; 142 static const int8_t inv_shift_32x16[2] = { -1, -4 }; 143 static const int8_t inv_shift_32x64[2] = { -1, -4 }; 144 static const int8_t inv_shift_64x32[2] = { -1, -4 }; 145 static const int8_t inv_shift_4x16[2] = { -1, -4 }; 146 static const int8_t inv_shift_16x4[2] = { -1, -4 }; 147 static const int8_t inv_shift_8x32[2] = { -2, -4 }; 148 static const int8_t inv_shift_32x8[2] = { -2, -4 }; 149 static const int8_t inv_shift_16x64[2] = { -2, -4 }; 150 static const int8_t inv_shift_64x16[2] = { -2, -4 }; 151 152 const int8_t *av1_inv_txfm_shift_ls[TX_SIZES_ALL] = { 153 inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32, 154 inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16, 155 inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64, 156 inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32, 157 inv_shift_32x8, inv_shift_16x64, inv_shift_64x16, 158 }; 159 160 static const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 }; 161 162 void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, 163 TXFM_2D_FLIP_CFG *cfg) { 164 assert(cfg != NULL); 165 cfg->tx_size = tx_size; 166 av1_zero(cfg->stage_range_col); 167 av1_zero(cfg->stage_range_row); 168 set_flip_cfg(tx_type, cfg); 169 const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; 170 const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; 171 cfg->shift = av1_inv_txfm_shift_ls[tx_size]; 172 const int txw_idx = get_txw_idx(tx_size); 173 const int txh_idx = get_txh_idx(tx_size); 174 cfg->cos_bit_col = INV_COS_BIT; 175 cfg->cos_bit_row = INV_COS_BIT; 176 cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; 177 if (cfg->txfm_type_col == TXFM_TYPE_ADST4) { 178 memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range)); 179 } 180 cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; 181 if (cfg->txfm_type_row == TXFM_TYPE_ADST4) { 182 memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range)); 183 } 184 cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; 185 cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; 186 } 187 188 void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, 189 const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, 190 int bd) { 191 const int fwd_shift = inv_start_range[tx_size]; 192 const int8_t *shift = cfg->shift; 193 int8_t opt_range_row, opt_range_col; 194 if (bd == 8) { 195 opt_range_row = 16; 196 opt_range_col = 16; 197 } else if (bd == 10) { 198 opt_range_row = 18; 199 opt_range_col = 16; 200 } else { 201 assert(bd == 12); 202 opt_range_row = 20; 203 opt_range_col = 18; 204 } 205 // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning 206 for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { 207 int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1; 208 (void)real_range_row; 209 if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) { 210 // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 211 // so opt_range_row >= real_range_row will not hold 212 stage_range_row[i] = opt_range_row; 213 } else { 214 assert(opt_range_row >= real_range_row); 215 stage_range_row[i] = opt_range_row; 216 } 217 } 218 // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning 219 for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { 220 int real_range_col = 221 cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1; 222 (void)real_range_col; 223 if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) { 224 // the adst4 may use 1 extra bit on top of opt_range_col at stage 1 225 // so opt_range_col >= real_range_col will not hold 226 stage_range_col[i] = opt_range_col; 227 } else { 228 assert(opt_range_col >= real_range_col); 229 stage_range_col[i] = opt_range_col; 230 } 231 } 232 } 233 234 static inline void inv_txfm2d_add_c(const int32_t *input, uint16_t *output, 235 int stride, TXFM_2D_FLIP_CFG *cfg, 236 int32_t *txfm_buf, TX_SIZE tx_size, 237 int bd) { 238 // Note when assigning txfm_size_col, we use the txfm_size from the 239 // row configuration and vice versa. This is intentionally done to 240 // accurately perform rectangular transforms. When the transform is 241 // rectangular, the number of columns will be the same as the 242 // txfm_size stored in the row cfg struct. It will make no difference 243 // for square transforms. 244 const int txfm_size_col = tx_size_wide[cfg->tx_size]; 245 const int txfm_size_row = tx_size_high[cfg->tx_size]; 246 // Take the shift from the larger dimension in the rectangular case. 247 const int8_t *shift = cfg->shift; 248 const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); 249 int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; 250 int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; 251 assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); 252 assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); 253 av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd); 254 255 const int8_t cos_bit_col = cfg->cos_bit_col; 256 const int8_t cos_bit_row = cfg->cos_bit_row; 257 const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col); 258 const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row); 259 260 // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * 261 // AOMMAX(txfm_size_row, txfm_size_col) 262 // it is used for intermediate data buffering 263 const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); 264 int32_t *temp_in = txfm_buf; 265 int32_t *temp_out = temp_in + buf_offset; 266 int32_t *buf = temp_out + buf_offset; 267 int32_t *buf_ptr = buf; 268 int c, r; 269 270 // Rows 271 for (r = 0; r < txfm_size_row; ++r) { 272 if (abs(rect_type) == 1) { 273 for (c = 0; c < txfm_size_col; ++c) { 274 temp_in[c] = round_shift( 275 (int64_t)input[c * txfm_size_row + r] * NewInvSqrt2, NewSqrt2Bits); 276 } 277 clamp_buf(temp_in, txfm_size_col, bd + 8); 278 txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); 279 } else { 280 for (c = 0; c < txfm_size_col; ++c) { 281 temp_in[c] = input[c * txfm_size_row + r]; 282 } 283 clamp_buf(temp_in, txfm_size_col, bd + 8); 284 txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); 285 } 286 av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); 287 buf_ptr += txfm_size_col; 288 } 289 290 // Columns 291 for (c = 0; c < txfm_size_col; ++c) { 292 if (cfg->lr_flip == 0) { 293 for (r = 0; r < txfm_size_row; ++r) 294 temp_in[r] = buf[r * txfm_size_col + c]; 295 } else { 296 // flip left right 297 for (r = 0; r < txfm_size_row; ++r) 298 temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; 299 } 300 clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16)); 301 txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); 302 av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); 303 if (cfg->ud_flip == 0) { 304 for (r = 0; r < txfm_size_row; ++r) { 305 output[r * stride + c] = 306 highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); 307 } 308 } else { 309 // flip upside down 310 for (r = 0; r < txfm_size_row; ++r) { 311 output[r * stride + c] = highbd_clip_pixel_add( 312 output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); 313 } 314 } 315 } 316 } 317 318 static inline void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, 319 int stride, int32_t *txfm_buf, 320 TX_TYPE tx_type, TX_SIZE tx_size, 321 int bd) { 322 TXFM_2D_FLIP_CFG cfg; 323 av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg); 324 // Forward shift sum uses larger square size, to be consistent with what 325 // av1_gen_inv_stage_range() does for inverse shifts. 326 inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd); 327 } 328 329 void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, 330 int stride, TX_TYPE tx_type, int bd) { 331 DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); 332 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd); 333 } 334 335 void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, 336 int stride, TX_TYPE tx_type, int bd) { 337 DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); 338 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd); 339 } 340 341 void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, 342 int stride, TX_TYPE tx_type, int bd) { 343 DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]); 344 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd); 345 } 346 347 void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, 348 int stride, TX_TYPE tx_type, int bd) { 349 DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]); 350 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd); 351 } 352 353 void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, 354 int stride, TX_TYPE tx_type, int bd) { 355 DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]); 356 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd); 357 } 358 359 void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, 360 int stride, TX_TYPE tx_type, int bd) { 361 DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]); 362 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd); 363 } 364 365 void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, 366 int stride, TX_TYPE tx_type, int bd) { 367 DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]); 368 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd); 369 } 370 371 void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, 372 int stride, TX_TYPE tx_type, int bd) { 373 DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]); 374 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd); 375 } 376 377 void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, 378 int stride, TX_TYPE tx_type, int bd) { 379 DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]); 380 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd); 381 } 382 383 void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, 384 int stride, TX_TYPE tx_type, int bd) { 385 DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); 386 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd); 387 } 388 389 void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, 390 int stride, TX_TYPE tx_type, int bd) { 391 // TODO(urvang): Can the same array be reused, instead of using a new array? 392 // Remap 32x32 input into a modified 64x64 by: 393 // - Copying over these values in top-left 32x32 locations. 394 // - Setting the rest of the locations to 0. 395 int32_t mod_input[64 * 64]; 396 for (int col = 0; col < 32; ++col) { 397 memcpy(mod_input + col * 64, input + col * 32, 32 * sizeof(*mod_input)); 398 memset(mod_input + col * 64 + 32, 0, 32 * sizeof(*mod_input)); 399 } 400 memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input)); 401 DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); 402 inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64, 403 bd); 404 } 405 406 void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, 407 int stride, TX_TYPE tx_type, int bd) { 408 // Remap 32x32 input into a modified 64x32 by: 409 // - Copying over these values in top-left 32x32 locations. 410 // - Setting the rest of the locations to 0. 411 int32_t mod_input[32 * 64]; 412 memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input)); 413 memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input)); 414 DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); 415 inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32, 416 bd); 417 } 418 419 void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, 420 int stride, TX_TYPE tx_type, int bd) { 421 // Remap 32x32 input into a modified 32x64 input by: 422 // - Copying over these values in top-left 32x32 locations. 423 // - Setting the rest of the locations to 0. 424 int32_t mod_input[64 * 32]; 425 for (int col = 0; col < 32; ++col) { 426 memcpy(mod_input + col * 64, input + col * 32, 32 * sizeof(*mod_input)); 427 memset(mod_input + col * 64 + 32, 0, 32 * sizeof(*mod_input)); 428 } 429 DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); 430 inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64, 431 bd); 432 } 433 434 void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, 435 int stride, TX_TYPE tx_type, int bd) { 436 // Remap 16x32 input into a modified 16x64 input by: 437 // - Copying over these values in top-left 16x32 locations. 438 // - Setting the rest of the locations to 0. 439 int32_t mod_input[64 * 16]; 440 for (int col = 0; col < 16; ++col) { 441 memcpy(mod_input + col * 64, input + col * 32, 32 * sizeof(*mod_input)); 442 memset(mod_input + col * 64 + 32, 0, 32 * sizeof(*mod_input)); 443 } 444 DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); 445 inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64, 446 bd); 447 } 448 449 void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, 450 int stride, TX_TYPE tx_type, int bd) { 451 // Remap 32x16 input into a modified 64x16 by: 452 // - Copying over these values in top-left 32x16 locations. 453 // - Setting the rest of the locations to 0. 454 int32_t mod_input[16 * 64]; 455 memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input)); 456 memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input)); 457 DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); 458 inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16, 459 bd); 460 } 461 462 void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, 463 int stride, TX_TYPE tx_type, int bd) { 464 DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); 465 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd); 466 } 467 468 void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, 469 int stride, TX_TYPE tx_type, int bd) { 470 DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); 471 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd); 472 } 473 474 void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, 475 int stride, TX_TYPE tx_type, int bd) { 476 DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); 477 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd); 478 } 479 480 void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, 481 int stride, TX_TYPE tx_type, int bd) { 482 DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); 483 inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd); 484 }