tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fft.c (8870B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include "aom_dsp/aom_dsp_common.h"
     13 #include "aom_dsp/fft_common.h"
     14 #include "config/aom_dsp_rtcd.h"
     15 
     16 static inline void simple_transpose(const float *A, float *B, int n) {
     17  for (int y = 0; y < n; y++) {
     18    for (int x = 0; x < n; x++) {
     19      B[y * n + x] = A[x * n + y];
     20    }
     21  }
     22 }
     23 
     24 // The 1d transform is real to complex and packs the complex results in
     25 // a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
     26 // components, followed by the n/2 - 1 imaginary components). After the
     27 // transform is done on the rows, the first n/2 + 1 columns are real, and
     28 // the remaining are the imaginary components. After the transform on the
     29 // columns, the region of [0, n/2]x[0, n/2] contains the real part of
     30 // fft of the real columns. The real part of the 2d fft also includes the
     31 // imaginary part of transformed imaginary columns. This function assembles
     32 // the correct outputs while putting the real and imaginary components
     33 // next to each other.
     34 static inline void unpack_2d_output(const float *col_fft, float *output,
     35                                    int n) {
     36  for (int y = 0; y <= n / 2; ++y) {
     37    const int y2 = y + n / 2;
     38    const int y_extra = y2 > n / 2 && y2 < n;
     39 
     40    for (int x = 0; x <= n / 2; ++x) {
     41      const int x2 = x + n / 2;
     42      const int x_extra = x2 > n / 2 && x2 < n;
     43      output[2 * (y * n + x)] =
     44          col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
     45      output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
     46                                    (x_extra ? col_fft[y * n + x2] : 0);
     47      if (y_extra) {
     48        output[2 * ((n - y) * n + x)] =
     49            col_fft[y * n + x] +
     50            (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
     51        output[2 * ((n - y) * n + x) + 1] =
     52            -(y_extra ? col_fft[y2 * n + x] : 0) +
     53            (x_extra ? col_fft[y * n + x2] : 0);
     54      }
     55    }
     56  }
     57 }
     58 
     59 void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
     60                    aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
     61                    aom_fft_unpack_func_t unpack, int vec_size) {
     62  for (int x = 0; x < n; x += vec_size) {
     63    tform(input + x, output + x, n);
     64  }
     65  transpose(output, temp, n);
     66 
     67  for (int x = 0; x < n; x += vec_size) {
     68    tform(temp + x, output + x, n);
     69  }
     70  transpose(output, temp, n);
     71 
     72  unpack(temp, output, n);
     73 }
     74 
     75 static inline void store_float(float *output, float input) { *output = input; }
     76 static inline float add_float(float a, float b) { return a + b; }
     77 static inline float sub_float(float a, float b) { return a - b; }
     78 static inline float mul_float(float a, float b) { return a * b; }
     79 
     80 GEN_FFT_2(void, float, float, float, *, store_float)
     81 GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
     82          sub_float)
     83 GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
     84          sub_float, mul_float)
     85 GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
     86           sub_float, mul_float)
     87 GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
     88           sub_float, mul_float)
     89 
     90 void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
     91  aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
     92                 unpack_2d_output, 1);
     93 }
     94 
     95 void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
     96  aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
     97                 unpack_2d_output, 1);
     98 }
     99 
    100 void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
    101  aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
    102                 unpack_2d_output, 1);
    103 }
    104 
    105 void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
    106  aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
    107                 unpack_2d_output, 1);
    108 }
    109 
    110 void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
    111  aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
    112                 unpack_2d_output, 1);
    113 }
    114 
    115 void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
    116                     aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
    117                     aom_fft_1d_func_t ifft_multi,
    118                     aom_fft_transpose_func_t transpose, int vec_size) {
    119  // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
    120  // and get real outputs.
    121  for (int y = 0; y <= n / 2; ++y) {
    122    output[y * n] = input[2 * y * n];
    123    output[y * n + 1] = input[2 * (y * n + n / 2)];
    124  }
    125  for (int y = n / 2 + 1; y < n; ++y) {
    126    output[y * n] = input[2 * (y - n / 2) * n + 1];
    127    output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
    128  }
    129 
    130  for (int i = 0; i < 2; i += vec_size) {
    131    ifft_multi(output + i, temp + i, n);
    132  }
    133 
    134  // For the other columns, since we don't have a full ifft for complex inputs
    135  // we have to split them into the real and imaginary counterparts.
    136  // Pack the real component, then the imaginary components.
    137  for (int y = 0; y < n; ++y) {
    138    for (int x = 1; x < n / 2; ++x) {
    139      output[y * n + (x + 1)] = input[2 * (y * n + x)];
    140    }
    141    for (int x = 1; x < n / 2; ++x) {
    142      output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
    143    }
    144  }
    145  for (int y = 2; y < vec_size; y++) {
    146    fft_single(output + y, temp + y, n);
    147  }
    148  // This is the part that can be sped up with SIMD
    149  for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
    150    fft_multi(output + y, temp + y, n);
    151  }
    152 
    153  // Put the 0 and n/2 th results in the correct place.
    154  for (int x = 0; x < n; ++x) {
    155    output[x] = temp[x * n];
    156    output[(n / 2) * n + x] = temp[x * n + 1];
    157  }
    158  // This rearranges and transposes.
    159  for (int y = 1; y < n / 2; ++y) {
    160    // Fill in the real columns
    161    for (int x = 0; x <= n / 2; ++x) {
    162      output[x + y * n] =
    163          temp[(y + 1) + x * n] +
    164          ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
    165    }
    166    for (int x = n / 2 + 1; x < n; ++x) {
    167      output[x + y * n] = temp[(y + 1) + (n - x) * n] -
    168                          temp[(y + n / 2) + ((n - x) + n / 2) * n];
    169    }
    170    // Fill in the imag columns
    171    for (int x = 0; x <= n / 2; ++x) {
    172      output[x + (y + n / 2) * n] =
    173          temp[(y + n / 2) + x * n] -
    174          ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
    175    }
    176    for (int x = n / 2 + 1; x < n; ++x) {
    177      output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
    178                                    temp[(y + n / 2) + (n - x) * n];
    179    }
    180  }
    181  for (int y = 0; y < n; y += vec_size) {
    182    ifft_multi(output + y, temp + y, n);
    183  }
    184  transpose(temp, output, n);
    185 }
    186 
    187 GEN_IFFT_2(static void, float, float, float, *, store_float)
    188 GEN_IFFT_4(static void, float, float, float, *, store_float, (float), add_float,
    189           sub_float)
    190 GEN_IFFT_8(static void, float, float, float, *, store_float, (float), add_float,
    191           sub_float, mul_float)
    192 GEN_IFFT_16(static void, float, float, float, *, store_float, (float),
    193            add_float, sub_float, mul_float)
    194 GEN_IFFT_32(static void, float, float, float, *, store_float, (float),
    195            add_float, sub_float, mul_float)
    196 
    197 void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
    198  aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
    199                  aom_ifft1d_2_float, simple_transpose, 1);
    200 }
    201 
    202 void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
    203  aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
    204                  aom_ifft1d_4_float, simple_transpose, 1);
    205 }
    206 
    207 void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
    208  aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
    209                  aom_ifft1d_8_float, simple_transpose, 1);
    210 }
    211 
    212 void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
    213  aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
    214                  aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
    215 }
    216 
    217 void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
    218  aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
    219                  aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
    220 }