idct.c (13543B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <math.h> 13 14 #include "config/aom_dsp_rtcd.h" 15 #include "config/av1_rtcd.h" 16 17 #include "aom_ports/mem.h" 18 #include "av1/common/av1_inv_txfm1d_cfg.h" 19 #include "av1/common/av1_txfm.h" 20 #include "av1/common/blockd.h" 21 #include "av1/common/enums.h" 22 #include "av1/common/idct.h" 23 24 int av1_get_tx_scale(const TX_SIZE tx_size) { 25 const int pels = tx_size_2d[tx_size]; 26 // Largest possible pels is 4096 (64x64). 27 return (pels > 256) + (pels > 1024); 28 } 29 30 // NOTE: The implementation of all inverses need to be aware of the fact 31 // that input and output could be the same buffer. 32 33 // idct 34 void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 35 int eob, int bd) { 36 if (eob > 1) 37 av1_highbd_iwht4x4_16_add(input, dest, stride, bd); 38 else 39 av1_highbd_iwht4x4_1_add(input, dest, stride, bd); 40 } 41 42 static void highbd_inv_txfm_add_4x4_c(const tran_low_t *input, uint8_t *dest, 43 int stride, const TxfmParam *txfm_param) { 44 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 45 int eob = txfm_param->eob; 46 int bd = txfm_param->bd; 47 int lossless = txfm_param->lossless; 48 const int32_t *src = cast_to_int32(input); 49 const TX_TYPE tx_type = txfm_param->tx_type; 50 if (lossless) { 51 assert(tx_type == DCT_DCT); 52 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd); 53 return; 54 } 55 56 av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); 57 } 58 59 static void highbd_inv_txfm_add_4x8_c(const tran_low_t *input, uint8_t *dest, 60 int stride, const TxfmParam *txfm_param) { 61 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 62 const int32_t *src = cast_to_int32(input); 63 av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 64 txfm_param->tx_type, txfm_param->bd); 65 } 66 67 static void highbd_inv_txfm_add_8x4_c(const tran_low_t *input, uint8_t *dest, 68 int stride, const TxfmParam *txfm_param) { 69 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 70 const int32_t *src = cast_to_int32(input); 71 av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, 72 txfm_param->tx_type, txfm_param->bd); 73 } 74 75 static void highbd_inv_txfm_add_16x32_c(const tran_low_t *input, uint8_t *dest, 76 int stride, 77 const TxfmParam *txfm_param) { 78 const int32_t *src = cast_to_int32(input); 79 av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 80 txfm_param->tx_type, txfm_param->bd); 81 } 82 83 static void highbd_inv_txfm_add_32x16_c(const tran_low_t *input, uint8_t *dest, 84 int stride, 85 const TxfmParam *txfm_param) { 86 const int32_t *src = cast_to_int32(input); 87 av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 88 txfm_param->tx_type, txfm_param->bd); 89 } 90 91 static void highbd_inv_txfm_add_16x4_c(const tran_low_t *input, uint8_t *dest, 92 int stride, 93 const TxfmParam *txfm_param) { 94 const int32_t *src = cast_to_int32(input); 95 av1_inv_txfm2d_add_16x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, 96 txfm_param->tx_type, txfm_param->bd); 97 } 98 99 static void highbd_inv_txfm_add_4x16_c(const tran_low_t *input, uint8_t *dest, 100 int stride, 101 const TxfmParam *txfm_param) { 102 const int32_t *src = cast_to_int32(input); 103 av1_inv_txfm2d_add_4x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 104 txfm_param->tx_type, txfm_param->bd); 105 } 106 107 static void highbd_inv_txfm_add_32x8_c(const tran_low_t *input, uint8_t *dest, 108 int stride, 109 const TxfmParam *txfm_param) { 110 const int32_t *src = cast_to_int32(input); 111 av1_inv_txfm2d_add_32x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 112 txfm_param->tx_type, txfm_param->bd); 113 } 114 115 static void highbd_inv_txfm_add_8x32_c(const tran_low_t *input, uint8_t *dest, 116 int stride, 117 const TxfmParam *txfm_param) { 118 const int32_t *src = cast_to_int32(input); 119 av1_inv_txfm2d_add_8x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 120 txfm_param->tx_type, txfm_param->bd); 121 } 122 123 static void highbd_inv_txfm_add_32x64_c(const tran_low_t *input, uint8_t *dest, 124 int stride, 125 const TxfmParam *txfm_param) { 126 const int32_t *src = cast_to_int32(input); 127 av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, 128 txfm_param->tx_type, txfm_param->bd); 129 } 130 131 static void highbd_inv_txfm_add_64x32_c(const tran_low_t *input, uint8_t *dest, 132 int stride, 133 const TxfmParam *txfm_param) { 134 const int32_t *src = cast_to_int32(input); 135 av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 136 txfm_param->tx_type, txfm_param->bd); 137 } 138 139 static void highbd_inv_txfm_add_16x64_c(const tran_low_t *input, uint8_t *dest, 140 int stride, 141 const TxfmParam *txfm_param) { 142 const int32_t *src = cast_to_int32(input); 143 av1_inv_txfm2d_add_16x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, 144 txfm_param->tx_type, txfm_param->bd); 145 } 146 147 static void highbd_inv_txfm_add_64x16_c(const tran_low_t *input, uint8_t *dest, 148 int stride, 149 const TxfmParam *txfm_param) { 150 const int32_t *src = cast_to_int32(input); 151 av1_inv_txfm2d_add_64x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 152 txfm_param->tx_type, txfm_param->bd); 153 } 154 155 static void highbd_inv_txfm_add_8x8_c(const tran_low_t *input, uint8_t *dest, 156 int stride, const TxfmParam *txfm_param) { 157 int bd = txfm_param->bd; 158 const TX_TYPE tx_type = txfm_param->tx_type; 159 const int32_t *src = cast_to_int32(input); 160 161 av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); 162 } 163 164 static void highbd_inv_txfm_add_16x16_c(const tran_low_t *input, uint8_t *dest, 165 int stride, 166 const TxfmParam *txfm_param) { 167 int bd = txfm_param->bd; 168 const TX_TYPE tx_type = txfm_param->tx_type; 169 const int32_t *src = cast_to_int32(input); 170 171 av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 172 bd); 173 } 174 175 static void highbd_inv_txfm_add_8x16_c(const tran_low_t *input, uint8_t *dest, 176 int stride, 177 const TxfmParam *txfm_param) { 178 const int32_t *src = cast_to_int32(input); 179 av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 180 txfm_param->tx_type, txfm_param->bd); 181 } 182 183 static void highbd_inv_txfm_add_16x8_c(const tran_low_t *input, uint8_t *dest, 184 int stride, 185 const TxfmParam *txfm_param) { 186 const int32_t *src = cast_to_int32(input); 187 av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 188 txfm_param->tx_type, txfm_param->bd); 189 } 190 191 static void highbd_inv_txfm_add_32x32_c(const tran_low_t *input, uint8_t *dest, 192 int stride, 193 const TxfmParam *txfm_param) { 194 const int bd = txfm_param->bd; 195 const TX_TYPE tx_type = txfm_param->tx_type; 196 const int32_t *src = cast_to_int32(input); 197 198 av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 199 bd); 200 } 201 202 static void highbd_inv_txfm_add_64x64_c(const tran_low_t *input, uint8_t *dest, 203 int stride, 204 const TxfmParam *txfm_param) { 205 const int bd = txfm_param->bd; 206 const TX_TYPE tx_type = txfm_param->tx_type; 207 const int32_t *src = cast_to_int32(input); 208 assert(tx_type == DCT_DCT); 209 av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 210 bd); 211 } 212 213 static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, 214 TX_TYPE tx_type, int eob, int reduced_tx_set, 215 TxfmParam *txfm_param) { 216 (void)plane; 217 txfm_param->tx_type = tx_type; 218 txfm_param->tx_size = tx_size; 219 txfm_param->eob = eob; 220 txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id]; 221 txfm_param->bd = xd->bd; 222 txfm_param->is_hbd = is_cur_buf_hbd(xd); 223 txfm_param->tx_set_type = av1_get_ext_tx_set_type( 224 txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set); 225 } 226 227 void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest, 228 int stride, const TxfmParam *txfm_param) { 229 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 230 const TX_SIZE tx_size = txfm_param->tx_size; 231 switch (tx_size) { 232 case TX_32X32: 233 highbd_inv_txfm_add_32x32_c(input, dest, stride, txfm_param); 234 break; 235 case TX_16X16: 236 highbd_inv_txfm_add_16x16_c(input, dest, stride, txfm_param); 237 break; 238 case TX_8X8: 239 highbd_inv_txfm_add_8x8_c(input, dest, stride, txfm_param); 240 break; 241 case TX_4X8: 242 highbd_inv_txfm_add_4x8_c(input, dest, stride, txfm_param); 243 break; 244 case TX_8X4: 245 highbd_inv_txfm_add_8x4_c(input, dest, stride, txfm_param); 246 break; 247 case TX_8X16: 248 highbd_inv_txfm_add_8x16_c(input, dest, stride, txfm_param); 249 break; 250 case TX_16X8: 251 highbd_inv_txfm_add_16x8_c(input, dest, stride, txfm_param); 252 break; 253 case TX_16X32: 254 highbd_inv_txfm_add_16x32_c(input, dest, stride, txfm_param); 255 break; 256 case TX_32X16: 257 highbd_inv_txfm_add_32x16_c(input, dest, stride, txfm_param); 258 break; 259 case TX_64X64: 260 highbd_inv_txfm_add_64x64_c(input, dest, stride, txfm_param); 261 break; 262 case TX_32X64: 263 highbd_inv_txfm_add_32x64_c(input, dest, stride, txfm_param); 264 break; 265 case TX_64X32: 266 highbd_inv_txfm_add_64x32_c(input, dest, stride, txfm_param); 267 break; 268 case TX_16X64: 269 highbd_inv_txfm_add_16x64_c(input, dest, stride, txfm_param); 270 break; 271 case TX_64X16: 272 highbd_inv_txfm_add_64x16_c(input, dest, stride, txfm_param); 273 break; 274 case TX_4X4: 275 // this is like av1_short_idct4x4 but has a special case around eob<=1 276 // which is significant (not just an optimization) for the lossless 277 // case. 278 highbd_inv_txfm_add_4x4_c(input, dest, stride, txfm_param); 279 break; 280 case TX_16X4: 281 highbd_inv_txfm_add_16x4_c(input, dest, stride, txfm_param); 282 break; 283 case TX_4X16: 284 highbd_inv_txfm_add_4x16_c(input, dest, stride, txfm_param); 285 break; 286 case TX_8X32: 287 highbd_inv_txfm_add_8x32_c(input, dest, stride, txfm_param); 288 break; 289 case TX_32X8: 290 highbd_inv_txfm_add_32x8_c(input, dest, stride, txfm_param); 291 break; 292 default: assert(0 && "Invalid transform size"); break; 293 } 294 } 295 296 void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, 297 const TxfmParam *txfm_param) { 298 const TX_SIZE tx_size = txfm_param->tx_size; 299 DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]); 300 int tmp_stride = MAX_TX_SIZE; 301 int w = tx_size_wide[tx_size]; 302 int h = tx_size_high[tx_size]; 303 for (int r = 0; r < h; ++r) { 304 for (int c = 0; c < w; ++c) { 305 tmp[r * tmp_stride + c] = dst[r * stride + c]; 306 } 307 } 308 309 av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, 310 txfm_param); 311 312 for (int r = 0; r < h; ++r) { 313 for (int c = 0; c < w; ++c) { 314 dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c]; 315 } 316 } 317 } 318 319 void av1_inverse_transform_block(const MACROBLOCKD *xd, 320 const tran_low_t *dqcoeff, int plane, 321 TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst, 322 int stride, int eob, int reduced_tx_set) { 323 if (!eob) return; 324 325 assert(eob <= av1_get_max_eob(tx_size)); 326 327 TxfmParam txfm_param; 328 init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set, 329 &txfm_param); 330 assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]); 331 332 if (txfm_param.is_hbd) { 333 av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); 334 } else { 335 av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); 336 } 337 }