[ tor-browser ].git.dasho

aom_convolve.c (10044B)
      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <string.h>
     14 
     15 #include "config/aom_config.h"
     16 #include "config/aom_dsp_rtcd.h"
     17 
     18 #include "aom/aom_integer.h"
     19 #include "aom_dsp/aom_dsp_common.h"
     20 #include "aom_dsp/aom_filter.h"
     21 #include "aom_ports/mem.h"
     22 
     23 static inline int horz_scalar_product(const uint8_t *a, const int16_t *b) {
     24  int sum = 0;
     25  for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
     26  return sum;
     27 }
     28 
     29 static inline int vert_scalar_product(const uint8_t *a, ptrdiff_t a_stride,
     30                                      const int16_t *b) {
     31  int sum = 0;
     32  for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
     33  return sum;
     34 }
     35 
     36 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
     37                           uint8_t *dst, ptrdiff_t dst_stride,
     38                           const InterpKernel *x_filters, int x0_q4,
     39                           int x_step_q4, int w, int h) {
     40  src -= SUBPEL_TAPS / 2 - 1;
     41  for (int y = 0; y < h; ++y) {
     42    int x_q4 = x0_q4;
     43    for (int x = 0; x < w; ++x) {
     44      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     45      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     46      const int sum = horz_scalar_product(src_x, x_filter);
     47      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     48      x_q4 += x_step_q4;
     49    }
     50    src += src_stride;
     51    dst += dst_stride;
     52  }
     53 }
     54 
     55 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
     56                          uint8_t *dst, ptrdiff_t dst_stride,
     57                          const InterpKernel *y_filters, int y0_q4,
     58                          int y_step_q4, int w, int h) {
     59  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     60 
     61  for (int x = 0; x < w; ++x) {
     62    int y_q4 = y0_q4;
     63    for (int y = 0; y < h; ++y) {
     64      const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     65      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     66      const int sum = vert_scalar_product(src_y, src_stride, y_filter);
     67      dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     68      y_q4 += y_step_q4;
     69    }
     70    ++src;
     71    ++dst;
     72  }
     73 }
     74 
     75 static const InterpKernel *get_filter_base(const int16_t *filter) {
     76  // NOTE: This assumes that the filter table is 256-byte aligned.
     77  return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
     78 }
     79 
     80 static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
     81  return (int)((const InterpKernel *)(intptr_t)f - base);
     82 }
     83 
     84 void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     85                           uint8_t *dst, ptrdiff_t dst_stride,
     86                           const int16_t *filter_x, int x_step_q4,
     87                           const int16_t *filter_y, int y_step_q4, int w,
     88                           int h) {
     89  const InterpKernel *const filters_x = get_filter_base(filter_x);
     90  const int x0_q4 = get_filter_offset(filter_x, filters_x);
     91 
     92  (void)filter_y;
     93  (void)y_step_q4;
     94 
     95  convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
     96                 w, h);
     97 }
     98 
     99 void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    100                          uint8_t *dst, ptrdiff_t dst_stride,
    101                          const int16_t *filter_x, int x_step_q4,
    102                          const int16_t *filter_y, int y_step_q4, int w,
    103                          int h) {
    104  const InterpKernel *const filters_y = get_filter_base(filter_y);
    105  const int y0_q4 = get_filter_offset(filter_y, filters_y);
    106 
    107  (void)filter_x;
    108  (void)x_step_q4;
    109 
    110  convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
    111                w, h);
    112 }
    113 
    114 void aom_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    115                     ptrdiff_t dst_stride, const InterpKernel *filter,
    116                     int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
    117                     int h) {
    118  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
    119  // 2d filtering proceeds in 2 steps:
    120  //   (1) Interpolate horizontally into an intermediate buffer, temp.
    121  //   (2) Interpolate temp vertically to derive the sub-pixel result.
    122  // Deriving the maximum number of rows in the temp buffer (135):
    123  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
    124  // --Largest block size is 64x64 pixels.
    125  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
    126  //   original frame (in 1/16th pixel units).
    127  // --Must round-up because block may be located at sub-pixel position.
    128  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
    129  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
    130  // When calling in frame scaling function, the smallest scaling factor is x1/4
    131  // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
    132  // big enough.
    133  uint8_t temp[64 * 135];
    134  const int intermediate_height =
    135      (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
    136 
    137  assert(w <= 64);
    138  assert(h <= 64);
    139  assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
    140  assert(x_step_q4 <= 64);
    141 
    142  convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
    143                 filter, x0_q4, x_step_q4, w, intermediate_height);
    144  convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
    145                y0_q4, y_step_q4, w, h);
    146 }
    147 
    148 void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
    149                         ptrdiff_t dst_stride, int w, int h) {
    150  for (int r = h; r > 0; --r) {
    151    memmove(dst, src, w);
    152    src += src_stride;
    153    dst += dst_stride;
    154  }
    155 }
    156 
    157 #if CONFIG_AV1_HIGHBITDEPTH
    158 static inline int highbd_vert_scalar_product(const uint16_t *a,
    159                                             ptrdiff_t a_stride,
    160                                             const int16_t *b) {
    161  int sum = 0;
    162  for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
    163  return sum;
    164 }
    165 
    166 static inline int highbd_horz_scalar_product(const uint16_t *a,
    167                                             const int16_t *b) {
    168  int sum = 0;
    169  for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
    170  return sum;
    171 }
    172 
    173 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
    174                                  uint8_t *dst8, ptrdiff_t dst_stride,
    175                                  const InterpKernel *x_filters, int x0_q4,
    176                                  int x_step_q4, int w, int h, int bd) {
    177  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    178  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    179  src -= SUBPEL_TAPS / 2 - 1;
    180  for (int y = 0; y < h; ++y) {
    181    int x_q4 = x0_q4;
    182    for (int x = 0; x < w; ++x) {
    183      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
    184      const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
    185      const int sum = highbd_horz_scalar_product(src_x, x_filter);
    186      dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    187      x_q4 += x_step_q4;
    188    }
    189    src += src_stride;
    190    dst += dst_stride;
    191  }
    192 }
    193 
    194 static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
    195                                 uint8_t *dst8, ptrdiff_t dst_stride,
    196                                 const InterpKernel *y_filters, int y0_q4,
    197                                 int y_step_q4, int w, int h, int bd) {
    198  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    199  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
    200  src -= src_stride * (SUBPEL_TAPS / 2 - 1);
    201  for (int x = 0; x < w; ++x) {
    202    int y_q4 = y0_q4;
    203    for (int y = 0; y < h; ++y) {
    204      const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
    205      const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
    206      const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);
    207      dst[y * dst_stride] =
    208          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
    209      y_q4 += y_step_q4;
    210    }
    211    ++src;
    212    ++dst;
    213  }
    214 }
    215 
    216 void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    217                                  uint8_t *dst, ptrdiff_t dst_stride,
    218                                  const int16_t *filter_x, int x_step_q4,
    219                                  const int16_t *filter_y, int y_step_q4, int w,
    220                                  int h, int bd) {
    221  const InterpKernel *const filters_x = get_filter_base(filter_x);
    222  const int x0_q4 = get_filter_offset(filter_x, filters_x);
    223  (void)filter_y;
    224  (void)y_step_q4;
    225 
    226  highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
    227                        x_step_q4, w, h, bd);
    228 }
    229 
    230 void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
    231                                 uint8_t *dst, ptrdiff_t dst_stride,
    232                                 const int16_t *filter_x, int x_step_q4,
    233                                 const int16_t *filter_y, int y_step_q4, int w,
    234                                 int h, int bd) {
    235  const InterpKernel *const filters_y = get_filter_base(filter_y);
    236  const int y0_q4 = get_filter_offset(filter_y, filters_y);
    237  (void)filter_x;
    238  (void)x_step_q4;
    239 
    240  highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
    241                       y_step_q4, w, h, bd);
    242 }
    243 
    244 void aom_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
    245                                uint16_t *dst, ptrdiff_t dst_stride, int w,
    246                                int h) {
    247  for (int y = 0; y < h; ++y) {
    248    memmove(dst, src, w * sizeof(src[0]));
    249    src += src_stride;
    250    dst += dst_stride;
    251  }
    252 }
    253 #endif  // CONFIG_AV1_HIGHBITDEPTH
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE