av1_fwd_txfm1d.c (63521B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <stdlib.h> 13 #include "av1/encoder/av1_fwd_txfm1d.h" 14 #include "av1/common/av1_txfm.h" 15 16 void av1_fdct4(const int32_t *input, int32_t *output, int8_t cos_bit, 17 const int8_t *stage_range) { 18 const int32_t size = 4; 19 const int32_t *cospi; 20 21 int32_t stage = 0; 22 int32_t *bf0, *bf1; 23 int32_t step[4]; 24 25 // stage 0; 26 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 27 28 // stage 1; 29 stage++; 30 bf1 = output; 31 bf1[0] = input[0] + input[3]; 32 bf1[1] = input[1] + input[2]; 33 bf1[2] = -input[2] + input[1]; 34 bf1[3] = -input[3] + input[0]; 35 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 36 37 // stage 2 38 stage++; 39 cospi = cospi_arr(cos_bit); 40 bf0 = output; 41 bf1 = step; 42 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); 43 bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); 44 bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); 45 bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); 46 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 47 48 // stage 3 49 stage++; 50 bf0 = step; 51 bf1 = output; 52 bf1[0] = bf0[0]; 53 bf1[1] = bf0[2]; 54 bf1[2] = bf0[1]; 55 bf1[3] = bf0[3]; 56 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 57 } 58 59 void av1_fdct8(const int32_t *input, int32_t *output, int8_t cos_bit, 60 const int8_t *stage_range) { 61 const int32_t size = 8; 62 const int32_t *cospi; 63 64 int32_t stage = 0; 65 int32_t *bf0, *bf1; 66 int32_t step[8]; 67 68 // stage 0; 69 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 70 71 // stage 1; 72 stage++; 73 bf1 = output; 74 bf1[0] = input[0] + input[7]; 75 bf1[1] = input[1] + input[6]; 76 bf1[2] = input[2] + input[5]; 77 bf1[3] = input[3] + input[4]; 78 bf1[4] = -input[4] + input[3]; 79 bf1[5] = -input[5] + input[2]; 80 bf1[6] = -input[6] + input[1]; 81 bf1[7] = -input[7] + input[0]; 82 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 83 84 // stage 2 85 stage++; 86 cospi = cospi_arr(cos_bit); 87 bf0 = output; 88 bf1 = step; 89 bf1[0] = bf0[0] + bf0[3]; 90 bf1[1] = bf0[1] + bf0[2]; 91 bf1[2] = -bf0[2] + bf0[1]; 92 bf1[3] = -bf0[3] + bf0[0]; 93 bf1[4] = bf0[4]; 94 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); 95 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); 96 bf1[7] = bf0[7]; 97 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 98 99 // stage 3 100 stage++; 101 cospi = cospi_arr(cos_bit); 102 bf0 = step; 103 bf1 = output; 104 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); 105 bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); 106 bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); 107 bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); 108 bf1[4] = bf0[4] + bf0[5]; 109 bf1[5] = -bf0[5] + bf0[4]; 110 bf1[6] = -bf0[6] + bf0[7]; 111 bf1[7] = bf0[7] + bf0[6]; 112 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 113 114 // stage 4 115 stage++; 116 cospi = cospi_arr(cos_bit); 117 bf0 = output; 118 bf1 = step; 119 bf1[0] = bf0[0]; 120 bf1[1] = bf0[1]; 121 bf1[2] = bf0[2]; 122 bf1[3] = bf0[3]; 123 bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); 124 bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); 125 bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); 126 bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); 127 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 128 129 // stage 5 130 stage++; 131 bf0 = step; 132 bf1 = output; 133 bf1[0] = bf0[0]; 134 bf1[1] = bf0[4]; 135 bf1[2] = bf0[2]; 136 bf1[3] = bf0[6]; 137 bf1[4] = bf0[1]; 138 bf1[5] = bf0[5]; 139 bf1[6] = bf0[3]; 140 bf1[7] = bf0[7]; 141 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 142 } 143 144 void av1_fdct16(const int32_t *input, int32_t *output, int8_t cos_bit, 145 const int8_t *stage_range) { 146 const int32_t size = 16; 147 const int32_t *cospi; 148 149 int32_t stage = 0; 150 int32_t *bf0, *bf1; 151 int32_t step[16]; 152 153 // stage 0; 154 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 155 156 // stage 1; 157 stage++; 158 bf1 = output; 159 bf1[0] = input[0] + input[15]; 160 bf1[1] = input[1] + input[14]; 161 bf1[2] = input[2] + input[13]; 162 bf1[3] = input[3] + input[12]; 163 bf1[4] = input[4] + input[11]; 164 bf1[5] = input[5] + input[10]; 165 bf1[6] = input[6] + input[9]; 166 bf1[7] = input[7] + input[8]; 167 bf1[8] = -input[8] + input[7]; 168 bf1[9] = -input[9] + input[6]; 169 bf1[10] = -input[10] + input[5]; 170 bf1[11] = -input[11] + input[4]; 171 bf1[12] = -input[12] + input[3]; 172 bf1[13] = -input[13] + input[2]; 173 bf1[14] = -input[14] + input[1]; 174 bf1[15] = -input[15] + input[0]; 175 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 176 177 // stage 2 178 stage++; 179 cospi = cospi_arr(cos_bit); 180 bf0 = output; 181 bf1 = step; 182 bf1[0] = bf0[0] + bf0[7]; 183 bf1[1] = bf0[1] + bf0[6]; 184 bf1[2] = bf0[2] + bf0[5]; 185 bf1[3] = bf0[3] + bf0[4]; 186 bf1[4] = -bf0[4] + bf0[3]; 187 bf1[5] = -bf0[5] + bf0[2]; 188 bf1[6] = -bf0[6] + bf0[1]; 189 bf1[7] = -bf0[7] + bf0[0]; 190 bf1[8] = bf0[8]; 191 bf1[9] = bf0[9]; 192 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); 193 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); 194 bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); 195 bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); 196 bf1[14] = bf0[14]; 197 bf1[15] = bf0[15]; 198 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 199 200 // stage 3 201 stage++; 202 cospi = cospi_arr(cos_bit); 203 bf0 = step; 204 bf1 = output; 205 bf1[0] = bf0[0] + bf0[3]; 206 bf1[1] = bf0[1] + bf0[2]; 207 bf1[2] = -bf0[2] + bf0[1]; 208 bf1[3] = -bf0[3] + bf0[0]; 209 bf1[4] = bf0[4]; 210 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); 211 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); 212 bf1[7] = bf0[7]; 213 bf1[8] = bf0[8] + bf0[11]; 214 bf1[9] = bf0[9] + bf0[10]; 215 bf1[10] = -bf0[10] + bf0[9]; 216 bf1[11] = -bf0[11] + bf0[8]; 217 bf1[12] = -bf0[12] + bf0[15]; 218 bf1[13] = -bf0[13] + bf0[14]; 219 bf1[14] = bf0[14] + bf0[13]; 220 bf1[15] = bf0[15] + bf0[12]; 221 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 222 223 // stage 4 224 stage++; 225 cospi = cospi_arr(cos_bit); 226 bf0 = output; 227 bf1 = step; 228 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); 229 bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); 230 bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); 231 bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); 232 bf1[4] = bf0[4] + bf0[5]; 233 bf1[5] = -bf0[5] + bf0[4]; 234 bf1[6] = -bf0[6] + bf0[7]; 235 bf1[7] = bf0[7] + bf0[6]; 236 bf1[8] = bf0[8]; 237 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); 238 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); 239 bf1[11] = bf0[11]; 240 bf1[12] = bf0[12]; 241 bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); 242 bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); 243 bf1[15] = bf0[15]; 244 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 245 246 // stage 5 247 stage++; 248 cospi = cospi_arr(cos_bit); 249 bf0 = step; 250 bf1 = output; 251 bf1[0] = bf0[0]; 252 bf1[1] = bf0[1]; 253 bf1[2] = bf0[2]; 254 bf1[3] = bf0[3]; 255 bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); 256 bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); 257 bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); 258 bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); 259 bf1[8] = bf0[8] + bf0[9]; 260 bf1[9] = -bf0[9] + bf0[8]; 261 bf1[10] = -bf0[10] + bf0[11]; 262 bf1[11] = bf0[11] + bf0[10]; 263 bf1[12] = bf0[12] + bf0[13]; 264 bf1[13] = -bf0[13] + bf0[12]; 265 bf1[14] = -bf0[14] + bf0[15]; 266 bf1[15] = bf0[15] + bf0[14]; 267 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 268 269 // stage 6 270 stage++; 271 cospi = cospi_arr(cos_bit); 272 bf0 = output; 273 bf1 = step; 274 bf1[0] = bf0[0]; 275 bf1[1] = bf0[1]; 276 bf1[2] = bf0[2]; 277 bf1[3] = bf0[3]; 278 bf1[4] = bf0[4]; 279 bf1[5] = bf0[5]; 280 bf1[6] = bf0[6]; 281 bf1[7] = bf0[7]; 282 bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); 283 bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); 284 bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); 285 bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); 286 bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); 287 bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); 288 bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); 289 bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); 290 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 291 292 // stage 7 293 stage++; 294 bf0 = step; 295 bf1 = output; 296 bf1[0] = bf0[0]; 297 bf1[1] = bf0[8]; 298 bf1[2] = bf0[4]; 299 bf1[3] = bf0[12]; 300 bf1[4] = bf0[2]; 301 bf1[5] = bf0[10]; 302 bf1[6] = bf0[6]; 303 bf1[7] = bf0[14]; 304 bf1[8] = bf0[1]; 305 bf1[9] = bf0[9]; 306 bf1[10] = bf0[5]; 307 bf1[11] = bf0[13]; 308 bf1[12] = bf0[3]; 309 bf1[13] = bf0[11]; 310 bf1[14] = bf0[7]; 311 bf1[15] = bf0[15]; 312 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 313 } 314 315 void av1_fdct32(const int32_t *input, int32_t *output, int8_t cos_bit, 316 const int8_t *stage_range) { 317 const int32_t size = 32; 318 const int32_t *cospi; 319 320 int32_t stage = 0; 321 int32_t *bf0, *bf1; 322 int32_t step[32]; 323 324 // stage 0; 325 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 326 327 // stage 1; 328 stage++; 329 bf1 = output; 330 bf1[0] = input[0] + input[31]; 331 bf1[1] = input[1] + input[30]; 332 bf1[2] = input[2] + input[29]; 333 bf1[3] = input[3] + input[28]; 334 bf1[4] = input[4] + input[27]; 335 bf1[5] = input[5] + input[26]; 336 bf1[6] = input[6] + input[25]; 337 bf1[7] = input[7] + input[24]; 338 bf1[8] = input[8] + input[23]; 339 bf1[9] = input[9] + input[22]; 340 bf1[10] = input[10] + input[21]; 341 bf1[11] = input[11] + input[20]; 342 bf1[12] = input[12] + input[19]; 343 bf1[13] = input[13] + input[18]; 344 bf1[14] = input[14] + input[17]; 345 bf1[15] = input[15] + input[16]; 346 bf1[16] = -input[16] + input[15]; 347 bf1[17] = -input[17] + input[14]; 348 bf1[18] = -input[18] + input[13]; 349 bf1[19] = -input[19] + input[12]; 350 bf1[20] = -input[20] + input[11]; 351 bf1[21] = -input[21] + input[10]; 352 bf1[22] = -input[22] + input[9]; 353 bf1[23] = -input[23] + input[8]; 354 bf1[24] = -input[24] + input[7]; 355 bf1[25] = -input[25] + input[6]; 356 bf1[26] = -input[26] + input[5]; 357 bf1[27] = -input[27] + input[4]; 358 bf1[28] = -input[28] + input[3]; 359 bf1[29] = -input[29] + input[2]; 360 bf1[30] = -input[30] + input[1]; 361 bf1[31] = -input[31] + input[0]; 362 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 363 364 // stage 2 365 stage++; 366 cospi = cospi_arr(cos_bit); 367 bf0 = output; 368 bf1 = step; 369 bf1[0] = bf0[0] + bf0[15]; 370 bf1[1] = bf0[1] + bf0[14]; 371 bf1[2] = bf0[2] + bf0[13]; 372 bf1[3] = bf0[3] + bf0[12]; 373 bf1[4] = bf0[4] + bf0[11]; 374 bf1[5] = bf0[5] + bf0[10]; 375 bf1[6] = bf0[6] + bf0[9]; 376 bf1[7] = bf0[7] + bf0[8]; 377 bf1[8] = -bf0[8] + bf0[7]; 378 bf1[9] = -bf0[9] + bf0[6]; 379 bf1[10] = -bf0[10] + bf0[5]; 380 bf1[11] = -bf0[11] + bf0[4]; 381 bf1[12] = -bf0[12] + bf0[3]; 382 bf1[13] = -bf0[13] + bf0[2]; 383 bf1[14] = -bf0[14] + bf0[1]; 384 bf1[15] = -bf0[15] + bf0[0]; 385 bf1[16] = bf0[16]; 386 bf1[17] = bf0[17]; 387 bf1[18] = bf0[18]; 388 bf1[19] = bf0[19]; 389 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); 390 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); 391 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); 392 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); 393 bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit); 394 bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit); 395 bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit); 396 bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit); 397 bf1[28] = bf0[28]; 398 bf1[29] = bf0[29]; 399 bf1[30] = bf0[30]; 400 bf1[31] = bf0[31]; 401 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 402 403 // stage 3 404 stage++; 405 cospi = cospi_arr(cos_bit); 406 bf0 = step; 407 bf1 = output; 408 bf1[0] = bf0[0] + bf0[7]; 409 bf1[1] = bf0[1] + bf0[6]; 410 bf1[2] = bf0[2] + bf0[5]; 411 bf1[3] = bf0[3] + bf0[4]; 412 bf1[4] = -bf0[4] + bf0[3]; 413 bf1[5] = -bf0[5] + bf0[2]; 414 bf1[6] = -bf0[6] + bf0[1]; 415 bf1[7] = -bf0[7] + bf0[0]; 416 bf1[8] = bf0[8]; 417 bf1[9] = bf0[9]; 418 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); 419 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); 420 bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); 421 bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); 422 bf1[14] = bf0[14]; 423 bf1[15] = bf0[15]; 424 bf1[16] = bf0[16] + bf0[23]; 425 bf1[17] = bf0[17] + bf0[22]; 426 bf1[18] = bf0[18] + bf0[21]; 427 bf1[19] = bf0[19] + bf0[20]; 428 bf1[20] = -bf0[20] + bf0[19]; 429 bf1[21] = -bf0[21] + bf0[18]; 430 bf1[22] = -bf0[22] + bf0[17]; 431 bf1[23] = -bf0[23] + bf0[16]; 432 bf1[24] = -bf0[24] + bf0[31]; 433 bf1[25] = -bf0[25] + bf0[30]; 434 bf1[26] = -bf0[26] + bf0[29]; 435 bf1[27] = -bf0[27] + bf0[28]; 436 bf1[28] = bf0[28] + bf0[27]; 437 bf1[29] = bf0[29] + bf0[26]; 438 bf1[30] = bf0[30] + bf0[25]; 439 bf1[31] = bf0[31] + bf0[24]; 440 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 441 442 // stage 4 443 stage++; 444 cospi = cospi_arr(cos_bit); 445 bf0 = output; 446 bf1 = step; 447 bf1[0] = bf0[0] + bf0[3]; 448 bf1[1] = bf0[1] + bf0[2]; 449 bf1[2] = -bf0[2] + bf0[1]; 450 bf1[3] = -bf0[3] + bf0[0]; 451 bf1[4] = bf0[4]; 452 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); 453 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); 454 bf1[7] = bf0[7]; 455 bf1[8] = bf0[8] + bf0[11]; 456 bf1[9] = bf0[9] + bf0[10]; 457 bf1[10] = -bf0[10] + bf0[9]; 458 bf1[11] = -bf0[11] + bf0[8]; 459 bf1[12] = -bf0[12] + bf0[15]; 460 bf1[13] = -bf0[13] + bf0[14]; 461 bf1[14] = bf0[14] + bf0[13]; 462 bf1[15] = bf0[15] + bf0[12]; 463 bf1[16] = bf0[16]; 464 bf1[17] = bf0[17]; 465 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); 466 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); 467 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); 468 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); 469 bf1[22] = bf0[22]; 470 bf1[23] = bf0[23]; 471 bf1[24] = bf0[24]; 472 bf1[25] = bf0[25]; 473 bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit); 474 bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit); 475 bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit); 476 bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit); 477 bf1[30] = bf0[30]; 478 bf1[31] = bf0[31]; 479 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 480 481 // stage 5 482 stage++; 483 cospi = cospi_arr(cos_bit); 484 bf0 = step; 485 bf1 = output; 486 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); 487 bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); 488 bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); 489 bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); 490 bf1[4] = bf0[4] + bf0[5]; 491 bf1[5] = -bf0[5] + bf0[4]; 492 bf1[6] = -bf0[6] + bf0[7]; 493 bf1[7] = bf0[7] + bf0[6]; 494 bf1[8] = bf0[8]; 495 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); 496 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); 497 bf1[11] = bf0[11]; 498 bf1[12] = bf0[12]; 499 bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); 500 bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); 501 bf1[15] = bf0[15]; 502 bf1[16] = bf0[16] + bf0[19]; 503 bf1[17] = bf0[17] + bf0[18]; 504 bf1[18] = -bf0[18] + bf0[17]; 505 bf1[19] = -bf0[19] + bf0[16]; 506 bf1[20] = -bf0[20] + bf0[23]; 507 bf1[21] = -bf0[21] + bf0[22]; 508 bf1[22] = bf0[22] + bf0[21]; 509 bf1[23] = bf0[23] + bf0[20]; 510 bf1[24] = bf0[24] + bf0[27]; 511 bf1[25] = bf0[25] + bf0[26]; 512 bf1[26] = -bf0[26] + bf0[25]; 513 bf1[27] = -bf0[27] + bf0[24]; 514 bf1[28] = -bf0[28] + bf0[31]; 515 bf1[29] = -bf0[29] + bf0[30]; 516 bf1[30] = bf0[30] + bf0[29]; 517 bf1[31] = bf0[31] + bf0[28]; 518 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 519 520 // stage 6 521 stage++; 522 cospi = cospi_arr(cos_bit); 523 bf0 = output; 524 bf1 = step; 525 bf1[0] = bf0[0]; 526 bf1[1] = bf0[1]; 527 bf1[2] = bf0[2]; 528 bf1[3] = bf0[3]; 529 bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); 530 bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); 531 bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); 532 bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); 533 bf1[8] = bf0[8] + bf0[9]; 534 bf1[9] = -bf0[9] + bf0[8]; 535 bf1[10] = -bf0[10] + bf0[11]; 536 bf1[11] = bf0[11] + bf0[10]; 537 bf1[12] = bf0[12] + bf0[13]; 538 bf1[13] = -bf0[13] + bf0[12]; 539 bf1[14] = -bf0[14] + bf0[15]; 540 bf1[15] = bf0[15] + bf0[14]; 541 bf1[16] = bf0[16]; 542 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); 543 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); 544 bf1[19] = bf0[19]; 545 bf1[20] = bf0[20]; 546 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); 547 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); 548 bf1[23] = bf0[23]; 549 bf1[24] = bf0[24]; 550 bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit); 551 bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit); 552 bf1[27] = bf0[27]; 553 bf1[28] = bf0[28]; 554 bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit); 555 bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit); 556 bf1[31] = bf0[31]; 557 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 558 559 // stage 7 560 stage++; 561 cospi = cospi_arr(cos_bit); 562 bf0 = step; 563 bf1 = output; 564 bf1[0] = bf0[0]; 565 bf1[1] = bf0[1]; 566 bf1[2] = bf0[2]; 567 bf1[3] = bf0[3]; 568 bf1[4] = bf0[4]; 569 bf1[5] = bf0[5]; 570 bf1[6] = bf0[6]; 571 bf1[7] = bf0[7]; 572 bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); 573 bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); 574 bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); 575 bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); 576 bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); 577 bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); 578 bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); 579 bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); 580 bf1[16] = bf0[16] + bf0[17]; 581 bf1[17] = -bf0[17] + bf0[16]; 582 bf1[18] = -bf0[18] + bf0[19]; 583 bf1[19] = bf0[19] + bf0[18]; 584 bf1[20] = bf0[20] + bf0[21]; 585 bf1[21] = -bf0[21] + bf0[20]; 586 bf1[22] = -bf0[22] + bf0[23]; 587 bf1[23] = bf0[23] + bf0[22]; 588 bf1[24] = bf0[24] + bf0[25]; 589 bf1[25] = -bf0[25] + bf0[24]; 590 bf1[26] = -bf0[26] + bf0[27]; 591 bf1[27] = bf0[27] + bf0[26]; 592 bf1[28] = bf0[28] + bf0[29]; 593 bf1[29] = -bf0[29] + bf0[28]; 594 bf1[30] = -bf0[30] + bf0[31]; 595 bf1[31] = bf0[31] + bf0[30]; 596 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 597 598 // stage 8 599 stage++; 600 cospi = cospi_arr(cos_bit); 601 bf0 = output; 602 bf1 = step; 603 bf1[0] = bf0[0]; 604 bf1[1] = bf0[1]; 605 bf1[2] = bf0[2]; 606 bf1[3] = bf0[3]; 607 bf1[4] = bf0[4]; 608 bf1[5] = bf0[5]; 609 bf1[6] = bf0[6]; 610 bf1[7] = bf0[7]; 611 bf1[8] = bf0[8]; 612 bf1[9] = bf0[9]; 613 bf1[10] = bf0[10]; 614 bf1[11] = bf0[11]; 615 bf1[12] = bf0[12]; 616 bf1[13] = bf0[13]; 617 bf1[14] = bf0[14]; 618 bf1[15] = bf0[15]; 619 bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit); 620 bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit); 621 bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit); 622 bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit); 623 bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit); 624 bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit); 625 bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit); 626 bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit); 627 bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit); 628 bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit); 629 bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit); 630 bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit); 631 bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit); 632 bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit); 633 bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit); 634 bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit); 635 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 636 637 // stage 9 638 stage++; 639 bf0 = step; 640 bf1 = output; 641 bf1[0] = bf0[0]; 642 bf1[1] = bf0[16]; 643 bf1[2] = bf0[8]; 644 bf1[3] = bf0[24]; 645 bf1[4] = bf0[4]; 646 bf1[5] = bf0[20]; 647 bf1[6] = bf0[12]; 648 bf1[7] = bf0[28]; 649 bf1[8] = bf0[2]; 650 bf1[9] = bf0[18]; 651 bf1[10] = bf0[10]; 652 bf1[11] = bf0[26]; 653 bf1[12] = bf0[6]; 654 bf1[13] = bf0[22]; 655 bf1[14] = bf0[14]; 656 bf1[15] = bf0[30]; 657 bf1[16] = bf0[1]; 658 bf1[17] = bf0[17]; 659 bf1[18] = bf0[9]; 660 bf1[19] = bf0[25]; 661 bf1[20] = bf0[5]; 662 bf1[21] = bf0[21]; 663 bf1[22] = bf0[13]; 664 bf1[23] = bf0[29]; 665 bf1[24] = bf0[3]; 666 bf1[25] = bf0[19]; 667 bf1[26] = bf0[11]; 668 bf1[27] = bf0[27]; 669 bf1[28] = bf0[7]; 670 bf1[29] = bf0[23]; 671 bf1[30] = bf0[15]; 672 bf1[31] = bf0[31]; 673 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 674 } 675 676 void av1_fadst4(const int32_t *input, int32_t *output, int8_t cos_bit, 677 const int8_t *stage_range) { 678 int bit = cos_bit; 679 const int32_t *sinpi = sinpi_arr(bit); 680 int32_t x0, x1, x2, x3; 681 int32_t s0, s1, s2, s3, s4, s5, s6, s7; 682 683 // stage 0 684 av1_range_check_buf(0, input, input, 4, stage_range[0]); 685 x0 = input[0]; 686 x1 = input[1]; 687 x2 = input[2]; 688 x3 = input[3]; 689 690 if (!(x0 | x1 | x2 | x3)) { 691 output[0] = output[1] = output[2] = output[3] = 0; 692 return; 693 } 694 695 // stage 1 696 s0 = range_check_value(sinpi[1] * x0, bit + stage_range[1]); 697 s1 = range_check_value(sinpi[4] * x0, bit + stage_range[1]); 698 s2 = range_check_value(sinpi[2] * x1, bit + stage_range[1]); 699 s3 = range_check_value(sinpi[1] * x1, bit + stage_range[1]); 700 s4 = range_check_value(sinpi[3] * x2, bit + stage_range[1]); 701 s5 = range_check_value(sinpi[4] * x3, bit + stage_range[1]); 702 s6 = range_check_value(sinpi[2] * x3, bit + stage_range[1]); 703 s7 = range_check_value(x0 + x1, stage_range[1]); 704 705 // stage 2 706 s7 = range_check_value(s7 - x3, stage_range[2]); 707 708 // stage 3 709 x0 = range_check_value(s0 + s2, bit + stage_range[3]); 710 x1 = range_check_value(sinpi[3] * s7, bit + stage_range[3]); 711 x2 = range_check_value(s1 - s3, bit + stage_range[3]); 712 x3 = range_check_value(s4, bit + stage_range[3]); 713 714 // stage 4 715 x0 = range_check_value(x0 + s5, bit + stage_range[4]); 716 x2 = range_check_value(x2 + s6, bit + stage_range[4]); 717 718 // stage 5 719 s0 = range_check_value(x0 + x3, bit + stage_range[5]); 720 s1 = range_check_value(x1, bit + stage_range[5]); 721 s2 = range_check_value(x2 - x3, bit + stage_range[5]); 722 s3 = range_check_value(x2 - x0, bit + stage_range[5]); 723 724 // stage 6 725 s3 = range_check_value(s3 + x3, bit + stage_range[6]); 726 727 // 1-D transform scaling factor is sqrt(2). 728 output[0] = round_shift(s0, bit); 729 output[1] = round_shift(s1, bit); 730 output[2] = round_shift(s2, bit); 731 output[3] = round_shift(s3, bit); 732 av1_range_check_buf(6, input, output, 4, stage_range[6]); 733 } 734 735 void av1_fadst8(const int32_t *input, int32_t *output, int8_t cos_bit, 736 const int8_t *stage_range) { 737 const int32_t size = 8; 738 const int32_t *cospi; 739 740 int32_t stage = 0; 741 int32_t *bf0, *bf1; 742 int32_t step[8]; 743 744 // stage 0; 745 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 746 747 // stage 1; 748 stage++; 749 assert(output != input); 750 bf1 = output; 751 bf1[0] = input[0]; 752 bf1[1] = -input[7]; 753 bf1[2] = -input[3]; 754 bf1[3] = input[4]; 755 bf1[4] = -input[1]; 756 bf1[5] = input[6]; 757 bf1[6] = input[2]; 758 bf1[7] = -input[5]; 759 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 760 761 // stage 2 762 stage++; 763 cospi = cospi_arr(cos_bit); 764 bf0 = output; 765 bf1 = step; 766 bf1[0] = bf0[0]; 767 bf1[1] = bf0[1]; 768 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); 769 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); 770 bf1[4] = bf0[4]; 771 bf1[5] = bf0[5]; 772 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); 773 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); 774 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 775 776 // stage 3 777 stage++; 778 bf0 = step; 779 bf1 = output; 780 bf1[0] = bf0[0] + bf0[2]; 781 bf1[1] = bf0[1] + bf0[3]; 782 bf1[2] = bf0[0] - bf0[2]; 783 bf1[3] = bf0[1] - bf0[3]; 784 bf1[4] = bf0[4] + bf0[6]; 785 bf1[5] = bf0[5] + bf0[7]; 786 bf1[6] = bf0[4] - bf0[6]; 787 bf1[7] = bf0[5] - bf0[7]; 788 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 789 790 // stage 4 791 stage++; 792 cospi = cospi_arr(cos_bit); 793 bf0 = output; 794 bf1 = step; 795 bf1[0] = bf0[0]; 796 bf1[1] = bf0[1]; 797 bf1[2] = bf0[2]; 798 bf1[3] = bf0[3]; 799 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); 800 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); 801 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); 802 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); 803 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 804 805 // stage 5 806 stage++; 807 bf0 = step; 808 bf1 = output; 809 bf1[0] = bf0[0] + bf0[4]; 810 bf1[1] = bf0[1] + bf0[5]; 811 bf1[2] = bf0[2] + bf0[6]; 812 bf1[3] = bf0[3] + bf0[7]; 813 bf1[4] = bf0[0] - bf0[4]; 814 bf1[5] = bf0[1] - bf0[5]; 815 bf1[6] = bf0[2] - bf0[6]; 816 bf1[7] = bf0[3] - bf0[7]; 817 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 818 819 // stage 6 820 stage++; 821 cospi = cospi_arr(cos_bit); 822 bf0 = output; 823 bf1 = step; 824 bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit); 825 bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit); 826 bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit); 827 bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit); 828 bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit); 829 bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit); 830 bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit); 831 bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit); 832 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 833 834 // stage 7 835 stage++; 836 bf0 = step; 837 bf1 = output; 838 bf1[0] = bf0[1]; 839 bf1[1] = bf0[6]; 840 bf1[2] = bf0[3]; 841 bf1[3] = bf0[4]; 842 bf1[4] = bf0[5]; 843 bf1[5] = bf0[2]; 844 bf1[6] = bf0[7]; 845 bf1[7] = bf0[0]; 846 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 847 } 848 849 void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit, 850 const int8_t *stage_range) { 851 const int32_t size = 16; 852 const int32_t *cospi; 853 854 int32_t stage = 0; 855 int32_t *bf0, *bf1; 856 int32_t step[16]; 857 858 // stage 0; 859 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 860 861 // stage 1; 862 stage++; 863 assert(output != input); 864 bf1 = output; 865 bf1[0] = input[0]; 866 bf1[1] = -input[15]; 867 bf1[2] = -input[7]; 868 bf1[3] = input[8]; 869 bf1[4] = -input[3]; 870 bf1[5] = input[12]; 871 bf1[6] = input[4]; 872 bf1[7] = -input[11]; 873 bf1[8] = -input[1]; 874 bf1[9] = input[14]; 875 bf1[10] = input[6]; 876 bf1[11] = -input[9]; 877 bf1[12] = input[2]; 878 bf1[13] = -input[13]; 879 bf1[14] = -input[5]; 880 bf1[15] = input[10]; 881 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 882 883 // stage 2 884 stage++; 885 cospi = cospi_arr(cos_bit); 886 bf0 = output; 887 bf1 = step; 888 bf1[0] = bf0[0]; 889 bf1[1] = bf0[1]; 890 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); 891 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); 892 bf1[4] = bf0[4]; 893 bf1[5] = bf0[5]; 894 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); 895 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); 896 bf1[8] = bf0[8]; 897 bf1[9] = bf0[9]; 898 bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit); 899 bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit); 900 bf1[12] = bf0[12]; 901 bf1[13] = bf0[13]; 902 bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit); 903 bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit); 904 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 905 906 // stage 3 907 stage++; 908 bf0 = step; 909 bf1 = output; 910 bf1[0] = bf0[0] + bf0[2]; 911 bf1[1] = bf0[1] + bf0[3]; 912 bf1[2] = bf0[0] - bf0[2]; 913 bf1[3] = bf0[1] - bf0[3]; 914 bf1[4] = bf0[4] + bf0[6]; 915 bf1[5] = bf0[5] + bf0[7]; 916 bf1[6] = bf0[4] - bf0[6]; 917 bf1[7] = bf0[5] - bf0[7]; 918 bf1[8] = bf0[8] + bf0[10]; 919 bf1[9] = bf0[9] + bf0[11]; 920 bf1[10] = bf0[8] - bf0[10]; 921 bf1[11] = bf0[9] - bf0[11]; 922 bf1[12] = bf0[12] + bf0[14]; 923 bf1[13] = bf0[13] + bf0[15]; 924 bf1[14] = bf0[12] - bf0[14]; 925 bf1[15] = bf0[13] - bf0[15]; 926 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 927 928 // stage 4 929 stage++; 930 cospi = cospi_arr(cos_bit); 931 bf0 = output; 932 bf1 = step; 933 bf1[0] = bf0[0]; 934 bf1[1] = bf0[1]; 935 bf1[2] = bf0[2]; 936 bf1[3] = bf0[3]; 937 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); 938 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); 939 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); 940 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); 941 bf1[8] = bf0[8]; 942 bf1[9] = bf0[9]; 943 bf1[10] = bf0[10]; 944 bf1[11] = bf0[11]; 945 bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit); 946 bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit); 947 bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit); 948 bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit); 949 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 950 951 // stage 5 952 stage++; 953 bf0 = step; 954 bf1 = output; 955 bf1[0] = bf0[0] + bf0[4]; 956 bf1[1] = bf0[1] + bf0[5]; 957 bf1[2] = bf0[2] + bf0[6]; 958 bf1[3] = bf0[3] + bf0[7]; 959 bf1[4] = bf0[0] - bf0[4]; 960 bf1[5] = bf0[1] - bf0[5]; 961 bf1[6] = bf0[2] - bf0[6]; 962 bf1[7] = bf0[3] - bf0[7]; 963 bf1[8] = bf0[8] + bf0[12]; 964 bf1[9] = bf0[9] + bf0[13]; 965 bf1[10] = bf0[10] + bf0[14]; 966 bf1[11] = bf0[11] + bf0[15]; 967 bf1[12] = bf0[8] - bf0[12]; 968 bf1[13] = bf0[9] - bf0[13]; 969 bf1[14] = bf0[10] - bf0[14]; 970 bf1[15] = bf0[11] - bf0[15]; 971 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 972 973 // stage 6 974 stage++; 975 cospi = cospi_arr(cos_bit); 976 bf0 = output; 977 bf1 = step; 978 bf1[0] = bf0[0]; 979 bf1[1] = bf0[1]; 980 bf1[2] = bf0[2]; 981 bf1[3] = bf0[3]; 982 bf1[4] = bf0[4]; 983 bf1[5] = bf0[5]; 984 bf1[6] = bf0[6]; 985 bf1[7] = bf0[7]; 986 bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit); 987 bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit); 988 bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit); 989 bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit); 990 bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit); 991 bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit); 992 bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit); 993 bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit); 994 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 995 996 // stage 7 997 stage++; 998 bf0 = step; 999 bf1 = output; 1000 bf1[0] = bf0[0] + bf0[8]; 1001 bf1[1] = bf0[1] + bf0[9]; 1002 bf1[2] = bf0[2] + bf0[10]; 1003 bf1[3] = bf0[3] + bf0[11]; 1004 bf1[4] = bf0[4] + bf0[12]; 1005 bf1[5] = bf0[5] + bf0[13]; 1006 bf1[6] = bf0[6] + bf0[14]; 1007 bf1[7] = bf0[7] + bf0[15]; 1008 bf1[8] = bf0[0] - bf0[8]; 1009 bf1[9] = bf0[1] - bf0[9]; 1010 bf1[10] = bf0[2] - bf0[10]; 1011 bf1[11] = bf0[3] - bf0[11]; 1012 bf1[12] = bf0[4] - bf0[12]; 1013 bf1[13] = bf0[5] - bf0[13]; 1014 bf1[14] = bf0[6] - bf0[14]; 1015 bf1[15] = bf0[7] - bf0[15]; 1016 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1017 1018 // stage 8 1019 stage++; 1020 cospi = cospi_arr(cos_bit); 1021 bf0 = output; 1022 bf1 = step; 1023 bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit); 1024 bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit); 1025 bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit); 1026 bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit); 1027 bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit); 1028 bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit); 1029 bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit); 1030 bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit); 1031 bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit); 1032 bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit); 1033 bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit); 1034 bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit); 1035 bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit); 1036 bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit); 1037 bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit); 1038 bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit); 1039 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1040 1041 // stage 9 1042 stage++; 1043 bf0 = step; 1044 bf1 = output; 1045 bf1[0] = bf0[1]; 1046 bf1[1] = bf0[14]; 1047 bf1[2] = bf0[3]; 1048 bf1[3] = bf0[12]; 1049 bf1[4] = bf0[5]; 1050 bf1[5] = bf0[10]; 1051 bf1[6] = bf0[7]; 1052 bf1[7] = bf0[8]; 1053 bf1[8] = bf0[9]; 1054 bf1[9] = bf0[6]; 1055 bf1[10] = bf0[11]; 1056 bf1[11] = bf0[4]; 1057 bf1[12] = bf0[13]; 1058 bf1[13] = bf0[2]; 1059 bf1[14] = bf0[15]; 1060 bf1[15] = bf0[0]; 1061 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1062 } 1063 1064 void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, 1065 const int8_t *stage_range) { 1066 (void)cos_bit; 1067 for (int i = 0; i < 4; ++i) 1068 output[i] = round_shift((int64_t)input[i] * NewSqrt2, NewSqrt2Bits); 1069 assert(stage_range[0] + NewSqrt2Bits <= 32); 1070 av1_range_check_buf(0, input, output, 4, stage_range[0]); 1071 } 1072 1073 void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, 1074 const int8_t *stage_range) { 1075 (void)cos_bit; 1076 for (int i = 0; i < 8; ++i) output[i] = input[i] * 2; 1077 av1_range_check_buf(0, input, output, 8, stage_range[0]); 1078 } 1079 1080 void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, 1081 const int8_t *stage_range) { 1082 (void)cos_bit; 1083 for (int i = 0; i < 16; ++i) 1084 output[i] = round_shift((int64_t)input[i] * 2 * NewSqrt2, NewSqrt2Bits); 1085 assert(stage_range[0] + NewSqrt2Bits <= 32); 1086 av1_range_check_buf(0, input, output, 16, stage_range[0]); 1087 } 1088 1089 void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, 1090 const int8_t *stage_range) { 1091 (void)cos_bit; 1092 for (int i = 0; i < 32; ++i) output[i] = input[i] * 4; 1093 av1_range_check_buf(0, input, output, 32, stage_range[0]); 1094 } 1095 1096 void av1_fdct64(const int32_t *input, int32_t *output, int8_t cos_bit, 1097 const int8_t *stage_range) { 1098 const int32_t size = 64; 1099 const int32_t *cospi; 1100 1101 int32_t stage = 0; 1102 int32_t *bf0, *bf1; 1103 int32_t step[64]; 1104 1105 // stage 0; 1106 av1_range_check_buf(stage, input, input, size, stage_range[stage]); 1107 1108 // stage 1; 1109 stage++; 1110 bf1 = output; 1111 bf1[0] = input[0] + input[63]; 1112 bf1[1] = input[1] + input[62]; 1113 bf1[2] = input[2] + input[61]; 1114 bf1[3] = input[3] + input[60]; 1115 bf1[4] = input[4] + input[59]; 1116 bf1[5] = input[5] + input[58]; 1117 bf1[6] = input[6] + input[57]; 1118 bf1[7] = input[7] + input[56]; 1119 bf1[8] = input[8] + input[55]; 1120 bf1[9] = input[9] + input[54]; 1121 bf1[10] = input[10] + input[53]; 1122 bf1[11] = input[11] + input[52]; 1123 bf1[12] = input[12] + input[51]; 1124 bf1[13] = input[13] + input[50]; 1125 bf1[14] = input[14] + input[49]; 1126 bf1[15] = input[15] + input[48]; 1127 bf1[16] = input[16] + input[47]; 1128 bf1[17] = input[17] + input[46]; 1129 bf1[18] = input[18] + input[45]; 1130 bf1[19] = input[19] + input[44]; 1131 bf1[20] = input[20] + input[43]; 1132 bf1[21] = input[21] + input[42]; 1133 bf1[22] = input[22] + input[41]; 1134 bf1[23] = input[23] + input[40]; 1135 bf1[24] = input[24] + input[39]; 1136 bf1[25] = input[25] + input[38]; 1137 bf1[26] = input[26] + input[37]; 1138 bf1[27] = input[27] + input[36]; 1139 bf1[28] = input[28] + input[35]; 1140 bf1[29] = input[29] + input[34]; 1141 bf1[30] = input[30] + input[33]; 1142 bf1[31] = input[31] + input[32]; 1143 bf1[32] = -input[32] + input[31]; 1144 bf1[33] = -input[33] + input[30]; 1145 bf1[34] = -input[34] + input[29]; 1146 bf1[35] = -input[35] + input[28]; 1147 bf1[36] = -input[36] + input[27]; 1148 bf1[37] = -input[37] + input[26]; 1149 bf1[38] = -input[38] + input[25]; 1150 bf1[39] = -input[39] + input[24]; 1151 bf1[40] = -input[40] + input[23]; 1152 bf1[41] = -input[41] + input[22]; 1153 bf1[42] = -input[42] + input[21]; 1154 bf1[43] = -input[43] + input[20]; 1155 bf1[44] = -input[44] + input[19]; 1156 bf1[45] = -input[45] + input[18]; 1157 bf1[46] = -input[46] + input[17]; 1158 bf1[47] = -input[47] + input[16]; 1159 bf1[48] = -input[48] + input[15]; 1160 bf1[49] = -input[49] + input[14]; 1161 bf1[50] = -input[50] + input[13]; 1162 bf1[51] = -input[51] + input[12]; 1163 bf1[52] = -input[52] + input[11]; 1164 bf1[53] = -input[53] + input[10]; 1165 bf1[54] = -input[54] + input[9]; 1166 bf1[55] = -input[55] + input[8]; 1167 bf1[56] = -input[56] + input[7]; 1168 bf1[57] = -input[57] + input[6]; 1169 bf1[58] = -input[58] + input[5]; 1170 bf1[59] = -input[59] + input[4]; 1171 bf1[60] = -input[60] + input[3]; 1172 bf1[61] = -input[61] + input[2]; 1173 bf1[62] = -input[62] + input[1]; 1174 bf1[63] = -input[63] + input[0]; 1175 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1176 1177 // stage 2 1178 stage++; 1179 cospi = cospi_arr(cos_bit); 1180 bf0 = output; 1181 bf1 = step; 1182 bf1[0] = bf0[0] + bf0[31]; 1183 bf1[1] = bf0[1] + bf0[30]; 1184 bf1[2] = bf0[2] + bf0[29]; 1185 bf1[3] = bf0[3] + bf0[28]; 1186 bf1[4] = bf0[4] + bf0[27]; 1187 bf1[5] = bf0[5] + bf0[26]; 1188 bf1[6] = bf0[6] + bf0[25]; 1189 bf1[7] = bf0[7] + bf0[24]; 1190 bf1[8] = bf0[8] + bf0[23]; 1191 bf1[9] = bf0[9] + bf0[22]; 1192 bf1[10] = bf0[10] + bf0[21]; 1193 bf1[11] = bf0[11] + bf0[20]; 1194 bf1[12] = bf0[12] + bf0[19]; 1195 bf1[13] = bf0[13] + bf0[18]; 1196 bf1[14] = bf0[14] + bf0[17]; 1197 bf1[15] = bf0[15] + bf0[16]; 1198 bf1[16] = -bf0[16] + bf0[15]; 1199 bf1[17] = -bf0[17] + bf0[14]; 1200 bf1[18] = -bf0[18] + bf0[13]; 1201 bf1[19] = -bf0[19] + bf0[12]; 1202 bf1[20] = -bf0[20] + bf0[11]; 1203 bf1[21] = -bf0[21] + bf0[10]; 1204 bf1[22] = -bf0[22] + bf0[9]; 1205 bf1[23] = -bf0[23] + bf0[8]; 1206 bf1[24] = -bf0[24] + bf0[7]; 1207 bf1[25] = -bf0[25] + bf0[6]; 1208 bf1[26] = -bf0[26] + bf0[5]; 1209 bf1[27] = -bf0[27] + bf0[4]; 1210 bf1[28] = -bf0[28] + bf0[3]; 1211 bf1[29] = -bf0[29] + bf0[2]; 1212 bf1[30] = -bf0[30] + bf0[1]; 1213 bf1[31] = -bf0[31] + bf0[0]; 1214 bf1[32] = bf0[32]; 1215 bf1[33] = bf0[33]; 1216 bf1[34] = bf0[34]; 1217 bf1[35] = bf0[35]; 1218 bf1[36] = bf0[36]; 1219 bf1[37] = bf0[37]; 1220 bf1[38] = bf0[38]; 1221 bf1[39] = bf0[39]; 1222 bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); 1223 bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); 1224 bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); 1225 bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); 1226 bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); 1227 bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); 1228 bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); 1229 bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); 1230 bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit); 1231 bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit); 1232 bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit); 1233 bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit); 1234 bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit); 1235 bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit); 1236 bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit); 1237 bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit); 1238 bf1[56] = bf0[56]; 1239 bf1[57] = bf0[57]; 1240 bf1[58] = bf0[58]; 1241 bf1[59] = bf0[59]; 1242 bf1[60] = bf0[60]; 1243 bf1[61] = bf0[61]; 1244 bf1[62] = bf0[62]; 1245 bf1[63] = bf0[63]; 1246 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1247 1248 // stage 3 1249 stage++; 1250 cospi = cospi_arr(cos_bit); 1251 bf0 = step; 1252 bf1 = output; 1253 bf1[0] = bf0[0] + bf0[15]; 1254 bf1[1] = bf0[1] + bf0[14]; 1255 bf1[2] = bf0[2] + bf0[13]; 1256 bf1[3] = bf0[3] + bf0[12]; 1257 bf1[4] = bf0[4] + bf0[11]; 1258 bf1[5] = bf0[5] + bf0[10]; 1259 bf1[6] = bf0[6] + bf0[9]; 1260 bf1[7] = bf0[7] + bf0[8]; 1261 bf1[8] = -bf0[8] + bf0[7]; 1262 bf1[9] = -bf0[9] + bf0[6]; 1263 bf1[10] = -bf0[10] + bf0[5]; 1264 bf1[11] = -bf0[11] + bf0[4]; 1265 bf1[12] = -bf0[12] + bf0[3]; 1266 bf1[13] = -bf0[13] + bf0[2]; 1267 bf1[14] = -bf0[14] + bf0[1]; 1268 bf1[15] = -bf0[15] + bf0[0]; 1269 bf1[16] = bf0[16]; 1270 bf1[17] = bf0[17]; 1271 bf1[18] = bf0[18]; 1272 bf1[19] = bf0[19]; 1273 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); 1274 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); 1275 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); 1276 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); 1277 bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit); 1278 bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit); 1279 bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit); 1280 bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit); 1281 bf1[28] = bf0[28]; 1282 bf1[29] = bf0[29]; 1283 bf1[30] = bf0[30]; 1284 bf1[31] = bf0[31]; 1285 bf1[32] = bf0[32] + bf0[47]; 1286 bf1[33] = bf0[33] + bf0[46]; 1287 bf1[34] = bf0[34] + bf0[45]; 1288 bf1[35] = bf0[35] + bf0[44]; 1289 bf1[36] = bf0[36] + bf0[43]; 1290 bf1[37] = bf0[37] + bf0[42]; 1291 bf1[38] = bf0[38] + bf0[41]; 1292 bf1[39] = bf0[39] + bf0[40]; 1293 bf1[40] = -bf0[40] + bf0[39]; 1294 bf1[41] = -bf0[41] + bf0[38]; 1295 bf1[42] = -bf0[42] + bf0[37]; 1296 bf1[43] = -bf0[43] + bf0[36]; 1297 bf1[44] = -bf0[44] + bf0[35]; 1298 bf1[45] = -bf0[45] + bf0[34]; 1299 bf1[46] = -bf0[46] + bf0[33]; 1300 bf1[47] = -bf0[47] + bf0[32]; 1301 bf1[48] = -bf0[48] + bf0[63]; 1302 bf1[49] = -bf0[49] + bf0[62]; 1303 bf1[50] = -bf0[50] + bf0[61]; 1304 bf1[51] = -bf0[51] + bf0[60]; 1305 bf1[52] = -bf0[52] + bf0[59]; 1306 bf1[53] = -bf0[53] + bf0[58]; 1307 bf1[54] = -bf0[54] + bf0[57]; 1308 bf1[55] = -bf0[55] + bf0[56]; 1309 bf1[56] = bf0[56] + bf0[55]; 1310 bf1[57] = bf0[57] + bf0[54]; 1311 bf1[58] = bf0[58] + bf0[53]; 1312 bf1[59] = bf0[59] + bf0[52]; 1313 bf1[60] = bf0[60] + bf0[51]; 1314 bf1[61] = bf0[61] + bf0[50]; 1315 bf1[62] = bf0[62] + bf0[49]; 1316 bf1[63] = bf0[63] + bf0[48]; 1317 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1318 1319 // stage 4 1320 stage++; 1321 cospi = cospi_arr(cos_bit); 1322 bf0 = output; 1323 bf1 = step; 1324 bf1[0] = bf0[0] + bf0[7]; 1325 bf1[1] = bf0[1] + bf0[6]; 1326 bf1[2] = bf0[2] + bf0[5]; 1327 bf1[3] = bf0[3] + bf0[4]; 1328 bf1[4] = -bf0[4] + bf0[3]; 1329 bf1[5] = -bf0[5] + bf0[2]; 1330 bf1[6] = -bf0[6] + bf0[1]; 1331 bf1[7] = -bf0[7] + bf0[0]; 1332 bf1[8] = bf0[8]; 1333 bf1[9] = bf0[9]; 1334 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); 1335 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); 1336 bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); 1337 bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); 1338 bf1[14] = bf0[14]; 1339 bf1[15] = bf0[15]; 1340 bf1[16] = bf0[16] + bf0[23]; 1341 bf1[17] = bf0[17] + bf0[22]; 1342 bf1[18] = bf0[18] + bf0[21]; 1343 bf1[19] = bf0[19] + bf0[20]; 1344 bf1[20] = -bf0[20] + bf0[19]; 1345 bf1[21] = -bf0[21] + bf0[18]; 1346 bf1[22] = -bf0[22] + bf0[17]; 1347 bf1[23] = -bf0[23] + bf0[16]; 1348 bf1[24] = -bf0[24] + bf0[31]; 1349 bf1[25] = -bf0[25] + bf0[30]; 1350 bf1[26] = -bf0[26] + bf0[29]; 1351 bf1[27] = -bf0[27] + bf0[28]; 1352 bf1[28] = bf0[28] + bf0[27]; 1353 bf1[29] = bf0[29] + bf0[26]; 1354 bf1[30] = bf0[30] + bf0[25]; 1355 bf1[31] = bf0[31] + bf0[24]; 1356 bf1[32] = bf0[32]; 1357 bf1[33] = bf0[33]; 1358 bf1[34] = bf0[34]; 1359 bf1[35] = bf0[35]; 1360 bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit); 1361 bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit); 1362 bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit); 1363 bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit); 1364 bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit); 1365 bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit); 1366 bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit); 1367 bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit); 1368 bf1[44] = bf0[44]; 1369 bf1[45] = bf0[45]; 1370 bf1[46] = bf0[46]; 1371 bf1[47] = bf0[47]; 1372 bf1[48] = bf0[48]; 1373 bf1[49] = bf0[49]; 1374 bf1[50] = bf0[50]; 1375 bf1[51] = bf0[51]; 1376 bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit); 1377 bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit); 1378 bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit); 1379 bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit); 1380 bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit); 1381 bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit); 1382 bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit); 1383 bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit); 1384 bf1[60] = bf0[60]; 1385 bf1[61] = bf0[61]; 1386 bf1[62] = bf0[62]; 1387 bf1[63] = bf0[63]; 1388 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1389 1390 // stage 5 1391 stage++; 1392 cospi = cospi_arr(cos_bit); 1393 bf0 = step; 1394 bf1 = output; 1395 bf1[0] = bf0[0] + bf0[3]; 1396 bf1[1] = bf0[1] + bf0[2]; 1397 bf1[2] = -bf0[2] + bf0[1]; 1398 bf1[3] = -bf0[3] + bf0[0]; 1399 bf1[4] = bf0[4]; 1400 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); 1401 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); 1402 bf1[7] = bf0[7]; 1403 bf1[8] = bf0[8] + bf0[11]; 1404 bf1[9] = bf0[9] + bf0[10]; 1405 bf1[10] = -bf0[10] + bf0[9]; 1406 bf1[11] = -bf0[11] + bf0[8]; 1407 bf1[12] = -bf0[12] + bf0[15]; 1408 bf1[13] = -bf0[13] + bf0[14]; 1409 bf1[14] = bf0[14] + bf0[13]; 1410 bf1[15] = bf0[15] + bf0[12]; 1411 bf1[16] = bf0[16]; 1412 bf1[17] = bf0[17]; 1413 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); 1414 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); 1415 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); 1416 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); 1417 bf1[22] = bf0[22]; 1418 bf1[23] = bf0[23]; 1419 bf1[24] = bf0[24]; 1420 bf1[25] = bf0[25]; 1421 bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit); 1422 bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit); 1423 bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit); 1424 bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit); 1425 bf1[30] = bf0[30]; 1426 bf1[31] = bf0[31]; 1427 bf1[32] = bf0[32] + bf0[39]; 1428 bf1[33] = bf0[33] + bf0[38]; 1429 bf1[34] = bf0[34] + bf0[37]; 1430 bf1[35] = bf0[35] + bf0[36]; 1431 bf1[36] = -bf0[36] + bf0[35]; 1432 bf1[37] = -bf0[37] + bf0[34]; 1433 bf1[38] = -bf0[38] + bf0[33]; 1434 bf1[39] = -bf0[39] + bf0[32]; 1435 bf1[40] = -bf0[40] + bf0[47]; 1436 bf1[41] = -bf0[41] + bf0[46]; 1437 bf1[42] = -bf0[42] + bf0[45]; 1438 bf1[43] = -bf0[43] + bf0[44]; 1439 bf1[44] = bf0[44] + bf0[43]; 1440 bf1[45] = bf0[45] + bf0[42]; 1441 bf1[46] = bf0[46] + bf0[41]; 1442 bf1[47] = bf0[47] + bf0[40]; 1443 bf1[48] = bf0[48] + bf0[55]; 1444 bf1[49] = bf0[49] + bf0[54]; 1445 bf1[50] = bf0[50] + bf0[53]; 1446 bf1[51] = bf0[51] + bf0[52]; 1447 bf1[52] = -bf0[52] + bf0[51]; 1448 bf1[53] = -bf0[53] + bf0[50]; 1449 bf1[54] = -bf0[54] + bf0[49]; 1450 bf1[55] = -bf0[55] + bf0[48]; 1451 bf1[56] = -bf0[56] + bf0[63]; 1452 bf1[57] = -bf0[57] + bf0[62]; 1453 bf1[58] = -bf0[58] + bf0[61]; 1454 bf1[59] = -bf0[59] + bf0[60]; 1455 bf1[60] = bf0[60] + bf0[59]; 1456 bf1[61] = bf0[61] + bf0[58]; 1457 bf1[62] = bf0[62] + bf0[57]; 1458 bf1[63] = bf0[63] + bf0[56]; 1459 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1460 1461 // stage 6 1462 stage++; 1463 cospi = cospi_arr(cos_bit); 1464 bf0 = output; 1465 bf1 = step; 1466 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); 1467 bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); 1468 bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); 1469 bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); 1470 bf1[4] = bf0[4] + bf0[5]; 1471 bf1[5] = -bf0[5] + bf0[4]; 1472 bf1[6] = -bf0[6] + bf0[7]; 1473 bf1[7] = bf0[7] + bf0[6]; 1474 bf1[8] = bf0[8]; 1475 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); 1476 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); 1477 bf1[11] = bf0[11]; 1478 bf1[12] = bf0[12]; 1479 bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); 1480 bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); 1481 bf1[15] = bf0[15]; 1482 bf1[16] = bf0[16] + bf0[19]; 1483 bf1[17] = bf0[17] + bf0[18]; 1484 bf1[18] = -bf0[18] + bf0[17]; 1485 bf1[19] = -bf0[19] + bf0[16]; 1486 bf1[20] = -bf0[20] + bf0[23]; 1487 bf1[21] = -bf0[21] + bf0[22]; 1488 bf1[22] = bf0[22] + bf0[21]; 1489 bf1[23] = bf0[23] + bf0[20]; 1490 bf1[24] = bf0[24] + bf0[27]; 1491 bf1[25] = bf0[25] + bf0[26]; 1492 bf1[26] = -bf0[26] + bf0[25]; 1493 bf1[27] = -bf0[27] + bf0[24]; 1494 bf1[28] = -bf0[28] + bf0[31]; 1495 bf1[29] = -bf0[29] + bf0[30]; 1496 bf1[30] = bf0[30] + bf0[29]; 1497 bf1[31] = bf0[31] + bf0[28]; 1498 bf1[32] = bf0[32]; 1499 bf1[33] = bf0[33]; 1500 bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit); 1501 bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit); 1502 bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit); 1503 bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit); 1504 bf1[38] = bf0[38]; 1505 bf1[39] = bf0[39]; 1506 bf1[40] = bf0[40]; 1507 bf1[41] = bf0[41]; 1508 bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit); 1509 bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit); 1510 bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit); 1511 bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit); 1512 bf1[46] = bf0[46]; 1513 bf1[47] = bf0[47]; 1514 bf1[48] = bf0[48]; 1515 bf1[49] = bf0[49]; 1516 bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit); 1517 bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit); 1518 bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit); 1519 bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit); 1520 bf1[54] = bf0[54]; 1521 bf1[55] = bf0[55]; 1522 bf1[56] = bf0[56]; 1523 bf1[57] = bf0[57]; 1524 bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit); 1525 bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit); 1526 bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit); 1527 bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit); 1528 bf1[62] = bf0[62]; 1529 bf1[63] = bf0[63]; 1530 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1531 1532 // stage 7 1533 stage++; 1534 cospi = cospi_arr(cos_bit); 1535 bf0 = step; 1536 bf1 = output; 1537 bf1[0] = bf0[0]; 1538 bf1[1] = bf0[1]; 1539 bf1[2] = bf0[2]; 1540 bf1[3] = bf0[3]; 1541 bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); 1542 bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); 1543 bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); 1544 bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); 1545 bf1[8] = bf0[8] + bf0[9]; 1546 bf1[9] = -bf0[9] + bf0[8]; 1547 bf1[10] = -bf0[10] + bf0[11]; 1548 bf1[11] = bf0[11] + bf0[10]; 1549 bf1[12] = bf0[12] + bf0[13]; 1550 bf1[13] = -bf0[13] + bf0[12]; 1551 bf1[14] = -bf0[14] + bf0[15]; 1552 bf1[15] = bf0[15] + bf0[14]; 1553 bf1[16] = bf0[16]; 1554 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); 1555 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); 1556 bf1[19] = bf0[19]; 1557 bf1[20] = bf0[20]; 1558 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); 1559 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); 1560 bf1[23] = bf0[23]; 1561 bf1[24] = bf0[24]; 1562 bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit); 1563 bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit); 1564 bf1[27] = bf0[27]; 1565 bf1[28] = bf0[28]; 1566 bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit); 1567 bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit); 1568 bf1[31] = bf0[31]; 1569 bf1[32] = bf0[32] + bf0[35]; 1570 bf1[33] = bf0[33] + bf0[34]; 1571 bf1[34] = -bf0[34] + bf0[33]; 1572 bf1[35] = -bf0[35] + bf0[32]; 1573 bf1[36] = -bf0[36] + bf0[39]; 1574 bf1[37] = -bf0[37] + bf0[38]; 1575 bf1[38] = bf0[38] + bf0[37]; 1576 bf1[39] = bf0[39] + bf0[36]; 1577 bf1[40] = bf0[40] + bf0[43]; 1578 bf1[41] = bf0[41] + bf0[42]; 1579 bf1[42] = -bf0[42] + bf0[41]; 1580 bf1[43] = -bf0[43] + bf0[40]; 1581 bf1[44] = -bf0[44] + bf0[47]; 1582 bf1[45] = -bf0[45] + bf0[46]; 1583 bf1[46] = bf0[46] + bf0[45]; 1584 bf1[47] = bf0[47] + bf0[44]; 1585 bf1[48] = bf0[48] + bf0[51]; 1586 bf1[49] = bf0[49] + bf0[50]; 1587 bf1[50] = -bf0[50] + bf0[49]; 1588 bf1[51] = -bf0[51] + bf0[48]; 1589 bf1[52] = -bf0[52] + bf0[55]; 1590 bf1[53] = -bf0[53] + bf0[54]; 1591 bf1[54] = bf0[54] + bf0[53]; 1592 bf1[55] = bf0[55] + bf0[52]; 1593 bf1[56] = bf0[56] + bf0[59]; 1594 bf1[57] = bf0[57] + bf0[58]; 1595 bf1[58] = -bf0[58] + bf0[57]; 1596 bf1[59] = -bf0[59] + bf0[56]; 1597 bf1[60] = -bf0[60] + bf0[63]; 1598 bf1[61] = -bf0[61] + bf0[62]; 1599 bf1[62] = bf0[62] + bf0[61]; 1600 bf1[63] = bf0[63] + bf0[60]; 1601 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1602 1603 // stage 8 1604 stage++; 1605 cospi = cospi_arr(cos_bit); 1606 bf0 = output; 1607 bf1 = step; 1608 bf1[0] = bf0[0]; 1609 bf1[1] = bf0[1]; 1610 bf1[2] = bf0[2]; 1611 bf1[3] = bf0[3]; 1612 bf1[4] = bf0[4]; 1613 bf1[5] = bf0[5]; 1614 bf1[6] = bf0[6]; 1615 bf1[7] = bf0[7]; 1616 bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); 1617 bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); 1618 bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); 1619 bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); 1620 bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); 1621 bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); 1622 bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); 1623 bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); 1624 bf1[16] = bf0[16] + bf0[17]; 1625 bf1[17] = -bf0[17] + bf0[16]; 1626 bf1[18] = -bf0[18] + bf0[19]; 1627 bf1[19] = bf0[19] + bf0[18]; 1628 bf1[20] = bf0[20] + bf0[21]; 1629 bf1[21] = -bf0[21] + bf0[20]; 1630 bf1[22] = -bf0[22] + bf0[23]; 1631 bf1[23] = bf0[23] + bf0[22]; 1632 bf1[24] = bf0[24] + bf0[25]; 1633 bf1[25] = -bf0[25] + bf0[24]; 1634 bf1[26] = -bf0[26] + bf0[27]; 1635 bf1[27] = bf0[27] + bf0[26]; 1636 bf1[28] = bf0[28] + bf0[29]; 1637 bf1[29] = -bf0[29] + bf0[28]; 1638 bf1[30] = -bf0[30] + bf0[31]; 1639 bf1[31] = bf0[31] + bf0[30]; 1640 bf1[32] = bf0[32]; 1641 bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit); 1642 bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit); 1643 bf1[35] = bf0[35]; 1644 bf1[36] = bf0[36]; 1645 bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit); 1646 bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit); 1647 bf1[39] = bf0[39]; 1648 bf1[40] = bf0[40]; 1649 bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit); 1650 bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit); 1651 bf1[43] = bf0[43]; 1652 bf1[44] = bf0[44]; 1653 bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit); 1654 bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit); 1655 bf1[47] = bf0[47]; 1656 bf1[48] = bf0[48]; 1657 bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit); 1658 bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit); 1659 bf1[51] = bf0[51]; 1660 bf1[52] = bf0[52]; 1661 bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit); 1662 bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit); 1663 bf1[55] = bf0[55]; 1664 bf1[56] = bf0[56]; 1665 bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit); 1666 bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit); 1667 bf1[59] = bf0[59]; 1668 bf1[60] = bf0[60]; 1669 bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit); 1670 bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit); 1671 bf1[63] = bf0[63]; 1672 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1673 1674 // stage 9 1675 stage++; 1676 cospi = cospi_arr(cos_bit); 1677 bf0 = step; 1678 bf1 = output; 1679 bf1[0] = bf0[0]; 1680 bf1[1] = bf0[1]; 1681 bf1[2] = bf0[2]; 1682 bf1[3] = bf0[3]; 1683 bf1[4] = bf0[4]; 1684 bf1[5] = bf0[5]; 1685 bf1[6] = bf0[6]; 1686 bf1[7] = bf0[7]; 1687 bf1[8] = bf0[8]; 1688 bf1[9] = bf0[9]; 1689 bf1[10] = bf0[10]; 1690 bf1[11] = bf0[11]; 1691 bf1[12] = bf0[12]; 1692 bf1[13] = bf0[13]; 1693 bf1[14] = bf0[14]; 1694 bf1[15] = bf0[15]; 1695 bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit); 1696 bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit); 1697 bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit); 1698 bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit); 1699 bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit); 1700 bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit); 1701 bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit); 1702 bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit); 1703 bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit); 1704 bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit); 1705 bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit); 1706 bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit); 1707 bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit); 1708 bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit); 1709 bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit); 1710 bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit); 1711 bf1[32] = bf0[32] + bf0[33]; 1712 bf1[33] = -bf0[33] + bf0[32]; 1713 bf1[34] = -bf0[34] + bf0[35]; 1714 bf1[35] = bf0[35] + bf0[34]; 1715 bf1[36] = bf0[36] + bf0[37]; 1716 bf1[37] = -bf0[37] + bf0[36]; 1717 bf1[38] = -bf0[38] + bf0[39]; 1718 bf1[39] = bf0[39] + bf0[38]; 1719 bf1[40] = bf0[40] + bf0[41]; 1720 bf1[41] = -bf0[41] + bf0[40]; 1721 bf1[42] = -bf0[42] + bf0[43]; 1722 bf1[43] = bf0[43] + bf0[42]; 1723 bf1[44] = bf0[44] + bf0[45]; 1724 bf1[45] = -bf0[45] + bf0[44]; 1725 bf1[46] = -bf0[46] + bf0[47]; 1726 bf1[47] = bf0[47] + bf0[46]; 1727 bf1[48] = bf0[48] + bf0[49]; 1728 bf1[49] = -bf0[49] + bf0[48]; 1729 bf1[50] = -bf0[50] + bf0[51]; 1730 bf1[51] = bf0[51] + bf0[50]; 1731 bf1[52] = bf0[52] + bf0[53]; 1732 bf1[53] = -bf0[53] + bf0[52]; 1733 bf1[54] = -bf0[54] + bf0[55]; 1734 bf1[55] = bf0[55] + bf0[54]; 1735 bf1[56] = bf0[56] + bf0[57]; 1736 bf1[57] = -bf0[57] + bf0[56]; 1737 bf1[58] = -bf0[58] + bf0[59]; 1738 bf1[59] = bf0[59] + bf0[58]; 1739 bf1[60] = bf0[60] + bf0[61]; 1740 bf1[61] = -bf0[61] + bf0[60]; 1741 bf1[62] = -bf0[62] + bf0[63]; 1742 bf1[63] = bf0[63] + bf0[62]; 1743 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1744 1745 // stage 10 1746 stage++; 1747 cospi = cospi_arr(cos_bit); 1748 bf0 = output; 1749 bf1 = step; 1750 bf1[0] = bf0[0]; 1751 bf1[1] = bf0[1]; 1752 bf1[2] = bf0[2]; 1753 bf1[3] = bf0[3]; 1754 bf1[4] = bf0[4]; 1755 bf1[5] = bf0[5]; 1756 bf1[6] = bf0[6]; 1757 bf1[7] = bf0[7]; 1758 bf1[8] = bf0[8]; 1759 bf1[9] = bf0[9]; 1760 bf1[10] = bf0[10]; 1761 bf1[11] = bf0[11]; 1762 bf1[12] = bf0[12]; 1763 bf1[13] = bf0[13]; 1764 bf1[14] = bf0[14]; 1765 bf1[15] = bf0[15]; 1766 bf1[16] = bf0[16]; 1767 bf1[17] = bf0[17]; 1768 bf1[18] = bf0[18]; 1769 bf1[19] = bf0[19]; 1770 bf1[20] = bf0[20]; 1771 bf1[21] = bf0[21]; 1772 bf1[22] = bf0[22]; 1773 bf1[23] = bf0[23]; 1774 bf1[24] = bf0[24]; 1775 bf1[25] = bf0[25]; 1776 bf1[26] = bf0[26]; 1777 bf1[27] = bf0[27]; 1778 bf1[28] = bf0[28]; 1779 bf1[29] = bf0[29]; 1780 bf1[30] = bf0[30]; 1781 bf1[31] = bf0[31]; 1782 bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit); 1783 bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit); 1784 bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit); 1785 bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit); 1786 bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit); 1787 bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit); 1788 bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit); 1789 bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit); 1790 bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit); 1791 bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit); 1792 bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit); 1793 bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit); 1794 bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit); 1795 bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit); 1796 bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit); 1797 bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit); 1798 bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit); 1799 bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit); 1800 bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit); 1801 bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit); 1802 bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit); 1803 bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit); 1804 bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit); 1805 bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit); 1806 bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit); 1807 bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit); 1808 bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit); 1809 bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit); 1810 bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit); 1811 bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit); 1812 bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit); 1813 bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit); 1814 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1815 1816 // stage 11 1817 stage++; 1818 bf0 = step; 1819 bf1 = output; 1820 bf1[0] = bf0[0]; 1821 bf1[1] = bf0[32]; 1822 bf1[2] = bf0[16]; 1823 bf1[3] = bf0[48]; 1824 bf1[4] = bf0[8]; 1825 bf1[5] = bf0[40]; 1826 bf1[6] = bf0[24]; 1827 bf1[7] = bf0[56]; 1828 bf1[8] = bf0[4]; 1829 bf1[9] = bf0[36]; 1830 bf1[10] = bf0[20]; 1831 bf1[11] = bf0[52]; 1832 bf1[12] = bf0[12]; 1833 bf1[13] = bf0[44]; 1834 bf1[14] = bf0[28]; 1835 bf1[15] = bf0[60]; 1836 bf1[16] = bf0[2]; 1837 bf1[17] = bf0[34]; 1838 bf1[18] = bf0[18]; 1839 bf1[19] = bf0[50]; 1840 bf1[20] = bf0[10]; 1841 bf1[21] = bf0[42]; 1842 bf1[22] = bf0[26]; 1843 bf1[23] = bf0[58]; 1844 bf1[24] = bf0[6]; 1845 bf1[25] = bf0[38]; 1846 bf1[26] = bf0[22]; 1847 bf1[27] = bf0[54]; 1848 bf1[28] = bf0[14]; 1849 bf1[29] = bf0[46]; 1850 bf1[30] = bf0[30]; 1851 bf1[31] = bf0[62]; 1852 bf1[32] = bf0[1]; 1853 bf1[33] = bf0[33]; 1854 bf1[34] = bf0[17]; 1855 bf1[35] = bf0[49]; 1856 bf1[36] = bf0[9]; 1857 bf1[37] = bf0[41]; 1858 bf1[38] = bf0[25]; 1859 bf1[39] = bf0[57]; 1860 bf1[40] = bf0[5]; 1861 bf1[41] = bf0[37]; 1862 bf1[42] = bf0[21]; 1863 bf1[43] = bf0[53]; 1864 bf1[44] = bf0[13]; 1865 bf1[45] = bf0[45]; 1866 bf1[46] = bf0[29]; 1867 bf1[47] = bf0[61]; 1868 bf1[48] = bf0[3]; 1869 bf1[49] = bf0[35]; 1870 bf1[50] = bf0[19]; 1871 bf1[51] = bf0[51]; 1872 bf1[52] = bf0[11]; 1873 bf1[53] = bf0[43]; 1874 bf1[54] = bf0[27]; 1875 bf1[55] = bf0[59]; 1876 bf1[56] = bf0[7]; 1877 bf1[57] = bf0[39]; 1878 bf1[58] = bf0[23]; 1879 bf1[59] = bf0[55]; 1880 bf1[60] = bf0[15]; 1881 bf1[61] = bf0[47]; 1882 bf1[62] = bf0[31]; 1883 bf1[63] = bf0[63]; 1884 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); 1885 }