tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aom_dsp_rtcd_defs.pl (98273B)


      1 ##
      2 ## Copyright (c) 2017, Alliance for Open Media. All rights reserved.
      3 ##
      4 ## This source code is subject to the terms of the BSD 2 Clause License and
      5 ## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 ## was not distributed with this source code in the LICENSE file, you can
      7 ## obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 ## Media Patent License 1.0 was not distributed with this source code in the
      9 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 ##
     11 sub aom_dsp_forward_decls() {
     12 print <<EOF
     13 /*
     14 * DSP
     15 */
     16 
     17 #include "aom/aom_integer.h"
     18 #include "aom_dsp/aom_dsp_common.h"
     19 #include "av1/common/blockd.h"
     20 #include "av1/common/enums.h"
     21 
     22 EOF
     23 }
     24 forward_decls qw/aom_dsp_forward_decls/;
     25 
     26 # optimizations which depend on multiple features
     27 $avx2_ssse3 = '';
     28 if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
     29  $avx2_ssse3 = 'avx2';
     30 }
     31 
     32 # functions that are 64 bit only.
     33 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
     34 if ($opts{arch} eq "x86_64") {
     35  $mmx_x86_64 = 'mmx';
     36  $sse2_x86_64 = 'sse2';
     37  $ssse3_x86_64 = 'ssse3';
     38  $avx_x86_64 = 'avx';
     39  $avx2_x86_64 = 'avx2';
     40 }
     41 
     42 @block_widths = (4, 8, 16, 32, 64, 128);
     43 
     44 @encoder_block_sizes = ();
     45 foreach $w (@block_widths) {
     46  foreach $h (@block_widths) {
     47    push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w);
     48  }
     49 }
     50 
     51 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
     52  push @encoder_block_sizes, [4, 16];
     53  push @encoder_block_sizes, [16, 4];
     54  push @encoder_block_sizes, [8, 32];
     55  push @encoder_block_sizes, [32, 8];
     56  push @encoder_block_sizes, [16, 64];
     57  push @encoder_block_sizes, [64, 16];
     58 }
     59 
     60 @tx_dims = (4, 8, 16, 32, 64);
     61 @tx_sizes = ();
     62 foreach $w (@tx_dims) {
     63  push @tx_sizes, [$w, $w];
     64  foreach $h (@tx_dims) {
     65    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
     66    if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
     67        (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
     68      push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
     69    }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
     70  }
     71 }
     72 
     73 @pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
     74 
     75 #
     76 # Intra prediction
     77 #
     78 
     79 foreach (@tx_sizes) {
     80  ($w, $h) = @$_;
     81  foreach $pred_name (@pred_names) {
     82    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
     83              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
     84    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
     85        add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
     86                  "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
     87    }
     88  }
     89 }
     90 
     91 specialize qw/aom_dc_top_predictor_4x4 neon sse2/;
     92 specialize qw/aom_dc_top_predictor_4x8 neon sse2/;
     93 specialize qw/aom_dc_top_predictor_8x4 neon sse2/;
     94 specialize qw/aom_dc_top_predictor_8x8 neon sse2/;
     95 specialize qw/aom_dc_top_predictor_8x16 neon sse2/;
     96 specialize qw/aom_dc_top_predictor_16x8 neon sse2/;
     97 specialize qw/aom_dc_top_predictor_16x16 neon sse2/;
     98 specialize qw/aom_dc_top_predictor_16x32 neon sse2/;
     99 specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/;
    100 specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/;
    101 specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/;
    102 specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/;
    103 specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/;
    104 
    105 specialize qw/aom_dc_left_predictor_4x4 neon sse2/;
    106 specialize qw/aom_dc_left_predictor_4x8 neon sse2/;
    107 specialize qw/aom_dc_left_predictor_8x4 neon sse2/;
    108 specialize qw/aom_dc_left_predictor_8x8 neon sse2/;
    109 specialize qw/aom_dc_left_predictor_8x16 neon sse2/;
    110 specialize qw/aom_dc_left_predictor_16x8 neon sse2/;
    111 specialize qw/aom_dc_left_predictor_16x16 neon sse2/;
    112 specialize qw/aom_dc_left_predictor_16x32 neon sse2/;
    113 specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/;
    114 specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/;
    115 specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/;
    116 specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/;
    117 specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/;
    118 
    119 specialize qw/aom_dc_128_predictor_4x4 neon sse2/;
    120 specialize qw/aom_dc_128_predictor_4x8 neon sse2/;
    121 specialize qw/aom_dc_128_predictor_8x4 neon sse2/;
    122 specialize qw/aom_dc_128_predictor_8x8 neon sse2/;
    123 specialize qw/aom_dc_128_predictor_8x16 neon sse2/;
    124 specialize qw/aom_dc_128_predictor_16x8 neon sse2/;
    125 specialize qw/aom_dc_128_predictor_16x16 neon sse2/;
    126 specialize qw/aom_dc_128_predictor_16x32 neon sse2/;
    127 specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/;
    128 specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/;
    129 specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/;
    130 specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/;
    131 specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/;
    132 
    133 specialize qw/aom_v_predictor_4x4 neon sse2/;
    134 specialize qw/aom_v_predictor_4x8 neon sse2/;
    135 specialize qw/aom_v_predictor_8x4 neon sse2/;
    136 specialize qw/aom_v_predictor_8x8 neon sse2/;
    137 specialize qw/aom_v_predictor_8x16 neon sse2/;
    138 specialize qw/aom_v_predictor_16x8 neon sse2/;
    139 specialize qw/aom_v_predictor_16x16 neon sse2/;
    140 specialize qw/aom_v_predictor_16x32 neon sse2/;
    141 specialize qw/aom_v_predictor_32x16 neon sse2 avx2/;
    142 specialize qw/aom_v_predictor_32x32 neon sse2 avx2/;
    143 specialize qw/aom_v_predictor_32x64 neon sse2 avx2/;
    144 specialize qw/aom_v_predictor_64x32 neon sse2 avx2/;
    145 specialize qw/aom_v_predictor_64x64 neon sse2 avx2/;
    146 
    147 specialize qw/aom_h_predictor_4x4 neon sse2/;
    148 specialize qw/aom_h_predictor_4x8 neon sse2/;
    149 specialize qw/aom_h_predictor_8x4 neon sse2/;
    150 specialize qw/aom_h_predictor_8x8 neon sse2/;
    151 specialize qw/aom_h_predictor_8x16 neon sse2/;
    152 specialize qw/aom_h_predictor_16x8 neon sse2/;
    153 specialize qw/aom_h_predictor_16x16 neon sse2/;
    154 specialize qw/aom_h_predictor_16x32 neon sse2/;
    155 specialize qw/aom_h_predictor_32x16 neon sse2/;
    156 specialize qw/aom_h_predictor_32x32 neon sse2 avx2/;
    157 specialize qw/aom_h_predictor_32x64 neon sse2/;
    158 specialize qw/aom_h_predictor_64x32 neon sse2/;
    159 specialize qw/aom_h_predictor_64x64 neon sse2/;
    160 
    161 specialize qw/aom_paeth_predictor_4x4 ssse3 neon/;
    162 specialize qw/aom_paeth_predictor_4x8 ssse3 neon/;
    163 specialize qw/aom_paeth_predictor_8x4 ssse3 neon/;
    164 specialize qw/aom_paeth_predictor_8x8 ssse3 neon/;
    165 specialize qw/aom_paeth_predictor_8x16 ssse3 neon/;
    166 specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/;
    167 specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/;
    168 specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/;
    169 specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/;
    170 specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/;
    171 specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/;
    172 specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/;
    173 specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/;
    174 
    175 specialize qw/aom_smooth_predictor_4x4 neon ssse3/;
    176 specialize qw/aom_smooth_predictor_4x8 neon ssse3/;
    177 specialize qw/aom_smooth_predictor_8x4 neon ssse3/;
    178 specialize qw/aom_smooth_predictor_8x8 neon ssse3/;
    179 specialize qw/aom_smooth_predictor_8x16 neon ssse3/;
    180 specialize qw/aom_smooth_predictor_16x8 neon ssse3/;
    181 specialize qw/aom_smooth_predictor_16x16 neon ssse3/;
    182 specialize qw/aom_smooth_predictor_16x32 neon ssse3/;
    183 specialize qw/aom_smooth_predictor_32x16 neon ssse3/;
    184 specialize qw/aom_smooth_predictor_32x32 neon ssse3/;
    185 specialize qw/aom_smooth_predictor_32x64 neon ssse3/;
    186 specialize qw/aom_smooth_predictor_64x32 neon ssse3/;
    187 specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
    188 
    189 specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/;
    190 specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/;
    191 specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/;
    192 specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/;
    193 specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/;
    194 specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/;
    195 specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/;
    196 specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/;
    197 specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/;
    198 specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/;
    199 specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/;
    200 specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/;
    201 specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/;
    202 
    203 specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/;
    204 specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/;
    205 specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/;
    206 specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/;
    207 specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/;
    208 specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/;
    209 specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/;
    210 specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/;
    211 specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/;
    212 specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/;
    213 specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/;
    214 specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/;
    215 specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/;
    216 
    217 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
    218 # by multiply and shift.
    219 specialize qw/aom_dc_predictor_4x4 neon sse2/;
    220 specialize qw/aom_dc_predictor_4x8 neon sse2/;
    221 specialize qw/aom_dc_predictor_8x4 neon sse2/;
    222 specialize qw/aom_dc_predictor_8x8 neon sse2/;
    223 specialize qw/aom_dc_predictor_8x16 neon sse2/;
    224 specialize qw/aom_dc_predictor_16x8 neon sse2/;
    225 specialize qw/aom_dc_predictor_16x16 neon sse2/;
    226 specialize qw/aom_dc_predictor_16x32 neon sse2/;
    227 specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/;
    228 specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/;
    229 specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/;
    230 specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/;
    231 specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/;
    232 
    233 
    234 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
    235  specialize qw/aom_dc_top_predictor_4x16 neon sse2/;
    236  specialize qw/aom_dc_top_predictor_8x32 neon sse2/;
    237  specialize qw/aom_dc_top_predictor_16x4 neon sse2/;
    238  specialize qw/aom_dc_top_predictor_16x64 neon sse2/;
    239  specialize qw/aom_dc_top_predictor_32x8 neon sse2/;
    240  specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/;
    241 
    242  specialize qw/aom_dc_left_predictor_4x16 neon sse2/;
    243  specialize qw/aom_dc_left_predictor_8x32 neon sse2/;
    244  specialize qw/aom_dc_left_predictor_16x4 neon sse2/;
    245  specialize qw/aom_dc_left_predictor_16x64 neon sse2/;
    246  specialize qw/aom_dc_left_predictor_32x8 neon sse2/;
    247  specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/;
    248 
    249  specialize qw/aom_dc_128_predictor_4x16 neon sse2/;
    250  specialize qw/aom_dc_128_predictor_8x32 neon sse2/;
    251  specialize qw/aom_dc_128_predictor_16x4 neon sse2/;
    252  specialize qw/aom_dc_128_predictor_16x64 neon sse2/;
    253  specialize qw/aom_dc_128_predictor_32x8 neon sse2/;
    254  specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/;
    255 
    256  specialize qw/aom_v_predictor_4x16 neon sse2/;
    257  specialize qw/aom_v_predictor_8x32 neon sse2/;
    258  specialize qw/aom_v_predictor_16x4 neon sse2/;
    259  specialize qw/aom_v_predictor_16x64 neon sse2/;
    260  specialize qw/aom_v_predictor_32x8 neon sse2/;
    261  specialize qw/aom_v_predictor_64x16 neon sse2 avx2/;
    262 
    263  specialize qw/aom_h_predictor_4x16 neon sse2/;
    264  specialize qw/aom_h_predictor_8x32 neon sse2/;
    265  specialize qw/aom_h_predictor_16x4 neon sse2/;
    266  specialize qw/aom_h_predictor_16x64 neon sse2/;
    267  specialize qw/aom_h_predictor_32x8 neon sse2/;
    268  specialize qw/aom_h_predictor_64x16 neon sse2/;
    269 
    270  specialize qw/aom_paeth_predictor_4x16 ssse3 neon/;
    271  specialize qw/aom_paeth_predictor_8x32 ssse3 neon/;
    272  specialize qw/aom_paeth_predictor_16x4 ssse3 neon/;
    273  specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/;
    274  specialize qw/aom_paeth_predictor_32x8 ssse3 neon/;
    275  specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/;
    276 
    277  specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
    278  specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
    279  specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
    280  specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
    281  specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
    282  specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
    283 
    284  specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/;
    285  specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/;
    286  specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/;
    287  specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/;
    288  specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/;
    289  specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/;
    290 
    291  specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/;
    292  specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/;
    293  specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/;
    294  specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/;
    295  specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/;
    296  specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/;
    297 
    298  specialize qw/aom_dc_predictor_4x16 neon sse2/;
    299  specialize qw/aom_dc_predictor_8x32 neon sse2/;
    300  specialize qw/aom_dc_predictor_16x4 neon sse2/;
    301  specialize qw/aom_dc_predictor_16x64 neon sse2/;
    302  specialize qw/aom_dc_predictor_32x8 neon sse2/;
    303  specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/;
    304 }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    305 
    306 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    307  specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/;
    308  specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/;
    309  specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/;
    310  specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/;
    311  specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/;
    312  specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/;
    313  specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/;
    314  specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/;
    315  specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/;
    316  specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/;
    317  specialize qw/aom_highbd_v_predictor_32x64 neon/;
    318  specialize qw/aom_highbd_v_predictor_64x32 neon/;
    319  specialize qw/aom_highbd_v_predictor_64x64 neon/;
    320 
    321  # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
    322  # by multiply and shift.
    323  specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
    324  specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/;
    325  specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/;
    326  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;
    327  specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/;
    328  specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/;
    329  specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
    330  specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/;
    331  specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/;
    332  specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/;
    333  specialize qw/aom_highbd_dc_predictor_32x64 neon/;
    334  specialize qw/aom_highbd_dc_predictor_64x32 neon/;
    335  specialize qw/aom_highbd_dc_predictor_64x64 neon/;
    336 
    337  specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/;
    338  specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/;
    339  specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/;
    340  specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/;
    341  specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/;
    342  specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/;
    343  specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/;
    344  specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/;
    345  specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/;
    346  specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/;
    347  specialize qw/aom_highbd_h_predictor_32x64 neon/;
    348  specialize qw/aom_highbd_h_predictor_64x32 neon/;
    349  specialize qw/aom_highbd_h_predictor_64x64 neon/;
    350 
    351  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/;
    352  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/;
    353  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/;
    354  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/;
    355  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/;
    356  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/;
    357  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/;
    358  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/;
    359  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/;
    360  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/;
    361  specialize qw/aom_highbd_dc_128_predictor_32x64 neon/;
    362  specialize qw/aom_highbd_dc_128_predictor_64x32 neon/;
    363  specialize qw/aom_highbd_dc_128_predictor_64x64 neon/;
    364 
    365  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/;
    366  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/;
    367  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/;
    368  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/;
    369  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/;
    370  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/;
    371  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/;
    372  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/;
    373  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/;
    374  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/;
    375  specialize qw/aom_highbd_dc_left_predictor_32x64 neon/;
    376  specialize qw/aom_highbd_dc_left_predictor_64x32 neon/;
    377  specialize qw/aom_highbd_dc_left_predictor_64x64 neon/;
    378 
    379  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/;
    380  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/;
    381  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/;
    382  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/;
    383  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/;
    384  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/;
    385  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/;
    386  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/;
    387  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/;
    388  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/;
    389  specialize qw/aom_highbd_dc_top_predictor_32x64 neon/;
    390  specialize qw/aom_highbd_dc_top_predictor_64x32 neon/;
    391  specialize qw/aom_highbd_dc_top_predictor_64x64 neon/;
    392 
    393  specialize qw/aom_highbd_paeth_predictor_4x4 neon/;
    394  specialize qw/aom_highbd_paeth_predictor_4x8 neon/;
    395  specialize qw/aom_highbd_paeth_predictor_8x4 neon/;
    396  specialize qw/aom_highbd_paeth_predictor_8x8 neon/;
    397  specialize qw/aom_highbd_paeth_predictor_8x16 neon/;
    398  specialize qw/aom_highbd_paeth_predictor_16x8 neon/;
    399  specialize qw/aom_highbd_paeth_predictor_16x16 neon/;
    400  specialize qw/aom_highbd_paeth_predictor_16x32 neon/;
    401  specialize qw/aom_highbd_paeth_predictor_32x16 neon/;
    402  specialize qw/aom_highbd_paeth_predictor_32x32 neon/;
    403  specialize qw/aom_highbd_paeth_predictor_32x64 neon/;
    404  specialize qw/aom_highbd_paeth_predictor_64x32 neon/;
    405  specialize qw/aom_highbd_paeth_predictor_64x64 neon/;
    406 
    407  specialize qw/aom_highbd_smooth_predictor_4x4 neon/;
    408  specialize qw/aom_highbd_smooth_predictor_4x8 neon/;
    409  specialize qw/aom_highbd_smooth_predictor_8x4 neon/;
    410  specialize qw/aom_highbd_smooth_predictor_8x8 neon/;
    411  specialize qw/aom_highbd_smooth_predictor_8x16 neon/;
    412  specialize qw/aom_highbd_smooth_predictor_16x8 neon/;
    413  specialize qw/aom_highbd_smooth_predictor_16x16 neon/;
    414  specialize qw/aom_highbd_smooth_predictor_16x32 neon/;
    415  specialize qw/aom_highbd_smooth_predictor_32x16 neon/;
    416  specialize qw/aom_highbd_smooth_predictor_32x32 neon/;
    417  specialize qw/aom_highbd_smooth_predictor_32x64 neon/;
    418  specialize qw/aom_highbd_smooth_predictor_64x32 neon/;
    419  specialize qw/aom_highbd_smooth_predictor_64x64 neon/;
    420 
    421  specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/;
    422  specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/;
    423  specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/;
    424  specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/;
    425  specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/;
    426  specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/;
    427  specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/;
    428  specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/;
    429  specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/;
    430  specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/;
    431  specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/;
    432  specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/;
    433  specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/;
    434  specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/;
    435  specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/;
    436 
    437  specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/;
    438  specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/;
    439  specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/;
    440  specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/;
    441  specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/;
    442  specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/;
    443  specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/;
    444  specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/;
    445  specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/;
    446  specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/;
    447  specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/;
    448 
    449  if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
    450      (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
    451    specialize qw/aom_highbd_v_predictor_4x16 neon/;
    452    specialize qw/aom_highbd_v_predictor_8x32 neon/;
    453    specialize qw/aom_highbd_v_predictor_16x4 neon/;
    454    specialize qw/aom_highbd_v_predictor_16x64 neon/;
    455    specialize qw/aom_highbd_v_predictor_32x8 neon/;
    456    specialize qw/aom_highbd_v_predictor_64x16 neon/;
    457 
    458    specialize qw/aom_highbd_dc_predictor_4x16 neon/;
    459    specialize qw/aom_highbd_dc_predictor_8x32 neon/;
    460    specialize qw/aom_highbd_dc_predictor_16x4 neon/;
    461    specialize qw/aom_highbd_dc_predictor_16x64 neon/;
    462    specialize qw/aom_highbd_dc_predictor_32x8 neon/;
    463    specialize qw/aom_highbd_dc_predictor_64x16 neon/;
    464 
    465    specialize qw/aom_highbd_h_predictor_4x16 neon/;
    466    specialize qw/aom_highbd_h_predictor_8x32 neon/;
    467    specialize qw/aom_highbd_h_predictor_16x4 neon/;
    468    specialize qw/aom_highbd_h_predictor_16x64 neon/;
    469    specialize qw/aom_highbd_h_predictor_32x8 neon/;
    470    specialize qw/aom_highbd_h_predictor_64x16 neon/;
    471 
    472    specialize qw/aom_highbd_dc_128_predictor_4x16 neon/;
    473    specialize qw/aom_highbd_dc_128_predictor_8x32 neon/;
    474    specialize qw/aom_highbd_dc_128_predictor_16x4 neon/;
    475    specialize qw/aom_highbd_dc_128_predictor_16x64 neon/;
    476    specialize qw/aom_highbd_dc_128_predictor_32x8 neon/;
    477    specialize qw/aom_highbd_dc_128_predictor_64x16 neon/;
    478 
    479    specialize qw/aom_highbd_dc_left_predictor_4x16 neon/;
    480    specialize qw/aom_highbd_dc_left_predictor_8x32 neon/;
    481    specialize qw/aom_highbd_dc_left_predictor_16x4 neon/;
    482    specialize qw/aom_highbd_dc_left_predictor_16x64 neon/;
    483    specialize qw/aom_highbd_dc_left_predictor_32x8 neon/;
    484    specialize qw/aom_highbd_dc_left_predictor_64x16 neon/;
    485 
    486    specialize qw/aom_highbd_dc_top_predictor_4x16 neon/;
    487    specialize qw/aom_highbd_dc_top_predictor_8x32 neon/;
    488    specialize qw/aom_highbd_dc_top_predictor_16x4 neon/;
    489    specialize qw/aom_highbd_dc_top_predictor_16x64 neon/;
    490    specialize qw/aom_highbd_dc_top_predictor_32x8 neon/;
    491    specialize qw/aom_highbd_dc_top_predictor_64x16 neon/;
    492 
    493    specialize qw/aom_highbd_paeth_predictor_4x16 neon/;
    494    specialize qw/aom_highbd_paeth_predictor_8x32 neon/;
    495    specialize qw/aom_highbd_paeth_predictor_16x4 neon/;
    496    specialize qw/aom_highbd_paeth_predictor_16x64 neon/;
    497    specialize qw/aom_highbd_paeth_predictor_32x8 neon/;
    498    specialize qw/aom_highbd_paeth_predictor_64x16 neon/;
    499 
    500    specialize qw/aom_highbd_smooth_predictor_4x16 neon/;
    501    specialize qw/aom_highbd_smooth_predictor_8x32 neon/;
    502    specialize qw/aom_highbd_smooth_predictor_16x4 neon/;
    503    specialize qw/aom_highbd_smooth_predictor_16x64 neon/;
    504    specialize qw/aom_highbd_smooth_predictor_32x8 neon/;
    505    specialize qw/aom_highbd_smooth_predictor_64x16 neon/;
    506 
    507    specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/;
    508    specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/;
    509    specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/;
    510    specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/;
    511    specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/;
    512    specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/;
    513 
    514    specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/;
    515    specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/;
    516    specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/;
    517    specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/;
    518    specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/;
    519    specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/;
    520  }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    521 }
    522 #
    523 # Sub Pixel Filters
    524 #
    525 add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h";
    526 add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
    527 add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
    528 
    529 specialize qw/aom_convolve_copy       neon                        sse2 avx2/;
    530 specialize qw/aom_convolve8_horiz     neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
    531 specialize qw/aom_convolve8_vert      neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
    532 
    533 add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
    534 specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/;
    535 
    536 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    537  add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h";
    538  specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/;
    539 
    540  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
    541  specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/;
    542 
    543  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
    544  specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/;
    545 }
    546 
    547 #
    548 # Loopfilter
    549 #
    550 add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    551 specialize qw/aom_lpf_vertical_14 sse2 neon/;
    552 
    553 add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    554 specialize qw/aom_lpf_vertical_14_dual sse2 neon/;
    555 
    556 add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    557 specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/;
    558 
    559 add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    560 specialize qw/aom_lpf_vertical_6 sse2 neon/;
    561 
    562 add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    563 specialize qw/aom_lpf_vertical_8 sse2 neon/;
    564 
    565 add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    566 specialize qw/aom_lpf_vertical_8_dual sse2 neon/;
    567 
    568 add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    569 specialize qw/aom_lpf_vertical_8_quad sse2 neon/;
    570 
    571 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    572 specialize qw/aom_lpf_vertical_4 sse2 neon/;
    573 
    574 add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    575 specialize qw/aom_lpf_vertical_4_dual sse2 neon/;
    576 
    577 add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    578 specialize qw/aom_lpf_vertical_4_quad sse2 neon/;
    579 
    580 add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    581 specialize qw/aom_lpf_horizontal_14 sse2 neon/;
    582 
    583 add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    584 specialize qw/aom_lpf_horizontal_14_dual sse2 neon/;
    585 
    586 add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    587 specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/;
    588 
    589 add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    590 specialize qw/aom_lpf_horizontal_6 sse2 neon/;
    591 
    592 add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    593 specialize qw/aom_lpf_horizontal_6_dual sse2 neon/;
    594 
    595 add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    596 specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/;
    597 
    598 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    599 specialize qw/aom_lpf_horizontal_8 sse2 neon/;
    600 
    601 add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    602 specialize qw/aom_lpf_horizontal_8_dual sse2 neon/;
    603 
    604 add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    605 specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/;
    606 
    607 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
    608 specialize qw/aom_lpf_horizontal_4 sse2 neon/;
    609 
    610 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    611 specialize qw/aom_lpf_horizontal_4_dual sse2 neon/;
    612 
    613 add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    614 specialize qw/aom_lpf_horizontal_4_quad sse2 neon/;
    615 
    616 add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
    617 specialize qw/aom_lpf_vertical_6_dual sse2 neon/;
    618 
    619 add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
    620 specialize qw/aom_lpf_vertical_6_quad sse2 neon/;
    621 
    622 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    623  add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    624  specialize qw/aom_highbd_lpf_vertical_14 neon sse2/;
    625 
    626  add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    627  specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/;
    628 
    629  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    630  specialize qw/aom_highbd_lpf_vertical_8 neon sse2/;
    631 
    632  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    633  specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/;
    634 
    635  add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    636  specialize qw/aom_highbd_lpf_vertical_6 neon sse2/;
    637 
    638  add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    639  specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/;
    640 
    641  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    642  specialize qw/aom_highbd_lpf_vertical_4 neon sse2/;
    643 
    644  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    645  specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/;
    646 
    647  add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    648  specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/;
    649 
    650  add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
    651  specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/;
    652 
    653  add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    654  specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/;
    655 
    656  add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    657  specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/;
    658 
    659  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    660  specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/;
    661 
    662  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    663  specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/;
    664 
    665  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
    666  specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/;
    667 
    668  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
    669  specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/;
    670 }
    671 
    672 #
    673 # Encoder functions.
    674 #
    675 
    676 #
    677 # Forward transform
    678 #
    679 if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
    680    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    681    specialize qw/aom_fdct4x4 neon sse2/;
    682 
    683    add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride";
    684    specialize qw/aom_fdct4x4_lp neon sse2/;
    685 
    686    if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){
    687      # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible
    688      # with av1 scan orders, because it does two transposes.
    689      add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    690      specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64";
    691      # High bit depth
    692      if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    693        add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    694        specialize qw/aom_highbd_fdct8x8 sse2/;
    695      }
    696    }
    697    # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation)
    698    add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output";
    699 
    700    add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output";
    701    specialize qw/aom_fft4x4_float                  sse2/;
    702 
    703    add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output";
    704    specialize qw/aom_fft8x8_float avx2             sse2/;
    705 
    706    add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output";
    707    specialize qw/aom_fft16x16_float avx2           sse2/;
    708 
    709    add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output";
    710    specialize qw/aom_fft32x32_float avx2           sse2/;
    711 
    712    add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output";
    713 
    714    add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output";
    715    specialize qw/aom_ifft4x4_float                 sse2/;
    716 
    717    add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output";
    718    specialize qw/aom_ifft8x8_float avx2            sse2/;
    719 
    720    add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output";
    721    specialize qw/aom_ifft16x16_float avx2          sse2/;
    722 
    723    add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output";
    724    specialize qw/aom_ifft32x32_float avx2          sse2/;
    725 }  # CONFIG_AV1_ENCODER
    726 
    727 #
    728 # Quantization
    729 #
    730 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    731  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    732  specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64";
    733 
    734  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    735  specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64";
    736 
    737  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    738  specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/;
    739 
    740  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
    741    add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    742    specialize qw/aom_quantize_b_adaptive sse2 avx2/;
    743 
    744    add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    745    specialize qw/aom_quantize_b_32x32_adaptive sse2/;
    746 
    747    add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    748    specialize qw/aom_quantize_b_64x64_adaptive sse2/;
    749  }
    750 }  # CONFIG_AV1_ENCODER
    751 
    752 if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    753  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    754  specialize qw/aom_highbd_quantize_b sse2 avx2 neon/;
    755 
    756  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    757  specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/;
    758 
    759  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    760  specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/;
    761 
    762  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
    763    add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    764    specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/;
    765 
    766    add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    767    specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/;
    768 
    769    add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    770    specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/;
    771  }
    772 }  # CONFIG_AV1_ENCODER
    773 
    774 #
    775 # Alpha blending with mask
    776 #
    777 add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params";
    778 specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
    779 add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh";
    780 add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
    781 add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
    782 specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/;
    783 specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
    784 specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
    785 
    786 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    787  add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd";
    788  add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
    789  add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
    790  add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd";
    791  specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/;
    792  specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/;
    793  specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/;
    794  specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/;
    795 }
    796 
    797 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    798  #
    799  # Block subtraction
    800  #
    801  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
    802  specialize qw/aom_subtract_block neon sse2 avx2/;
    803 
    804  add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
    805  specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/;
    806 
    807  add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum";
    808  specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/;
    809 
    810  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    811    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
    812    specialize qw/aom_highbd_subtract_block sse2 neon/;
    813 
    814    add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
    815    specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/;
    816  }
    817 
    818  #
    819  # Sum of Squares
    820  #
    821  add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
    822  specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/;
    823 
    824  add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
    825  specialize qw/aom_sum_squares_i16 sse2 neon sve/;
    826 
    827  add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height";
    828  specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/;
    829 
    830  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    831    add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height";
    832    specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/;
    833  }
    834 
    835  #
    836  # Single block SAD / Single block Avg SAD
    837  #
    838  foreach (@encoder_block_sizes) {
    839    ($w, $h) = @$_;
    840    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    841    if ($h >= 16) {
    842      add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    843    }
    844    if ($w != 4 && $h != 4) {
    845      add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    846    }
    847  }
    848 
    849  add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum";
    850  specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/;
    851  specialize qw/aom_sad128x128    avx2 sse2 neon neon_dotprod/;
    852  specialize qw/aom_sad128x64     avx2 sse2 neon neon_dotprod/;
    853  specialize qw/aom_sad64x128     avx2 sse2 neon neon_dotprod/;
    854  specialize qw/aom_sad64x64      avx2 sse2 neon neon_dotprod/;
    855  specialize qw/aom_sad64x32      avx2 sse2 neon neon_dotprod/;
    856 
    857  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
    858    specialize qw/aom_sad128x128    avx512/;
    859    specialize qw/aom_sad128x64     avx512/;
    860    specialize qw/aom_sad64x128     avx512/;
    861    specialize qw/aom_sad64x64      avx512/;
    862    specialize qw/aom_sad64x32      avx512/;
    863  }
    864 
    865  specialize qw/aom_sad32x64      avx2 sse2 neon neon_dotprod/;
    866  specialize qw/aom_sad32x32      avx2 sse2 neon neon_dotprod/;
    867  specialize qw/aom_sad32x16      avx2 sse2 neon neon_dotprod/;
    868  specialize qw/aom_sad16x32           sse2 neon neon_dotprod/;
    869  specialize qw/aom_sad16x16           sse2 neon neon_dotprod/;
    870  specialize qw/aom_sad16x8            sse2 neon neon_dotprod/;
    871  specialize qw/aom_sad8x16            sse2 neon/;
    872  specialize qw/aom_sad8x8             sse2 neon/;
    873  specialize qw/aom_sad8x4             sse2 neon/;
    874  specialize qw/aom_sad4x8             sse2 neon/;
    875  specialize qw/aom_sad4x4             sse2 neon/;
    876 
    877  specialize qw/aom_sad4x16            sse2 neon/;
    878  specialize qw/aom_sad16x4            sse2 neon neon_dotprod/;
    879  specialize qw/aom_sad8x32            sse2 neon/;
    880  specialize qw/aom_sad32x8            sse2 neon neon_dotprod/;
    881  specialize qw/aom_sad16x64           sse2 neon neon_dotprod/;
    882  specialize qw/aom_sad64x16           sse2 neon neon_dotprod/;
    883 
    884  specialize qw/aom_sad_skip_128x128    avx2 sse2 neon neon_dotprod/;
    885  specialize qw/aom_sad_skip_128x64     avx2 sse2 neon neon_dotprod/;
    886  specialize qw/aom_sad_skip_64x128     avx2 sse2 neon neon_dotprod/;
    887  specialize qw/aom_sad_skip_64x64      avx2 sse2 neon neon_dotprod/;
    888  specialize qw/aom_sad_skip_64x32      avx2 sse2 neon neon_dotprod/;
    889 
    890  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
    891    specialize qw/aom_sad_skip_128x128    avx512/;
    892    specialize qw/aom_sad_skip_128x64     avx512/;
    893    specialize qw/aom_sad_skip_64x128     avx512/;
    894    specialize qw/aom_sad_skip_64x64      avx512/;
    895    specialize qw/aom_sad_skip_64x32      avx512/;
    896  }
    897 
    898  specialize qw/aom_sad_skip_32x64      avx2 sse2 neon neon_dotprod/;
    899  specialize qw/aom_sad_skip_32x32      avx2 sse2 neon neon_dotprod/;
    900  specialize qw/aom_sad_skip_32x16      avx2 sse2 neon neon_dotprod/;
    901  specialize qw/aom_sad_skip_16x32           sse2 neon neon_dotprod/;
    902  specialize qw/aom_sad_skip_16x16           sse2 neon neon_dotprod/;
    903  specialize qw/aom_sad_skip_16x8            sse2 neon neon_dotprod/;
    904  specialize qw/aom_sad_skip_8x16            sse2 neon/;
    905 
    906  specialize qw/aom_sad_skip_4x16            sse2 neon/;
    907  specialize qw/aom_sad_skip_8x32            sse2 neon/;
    908  specialize qw/aom_sad_skip_16x64           sse2 neon neon_dotprod/;
    909  specialize qw/aom_sad_skip_64x16           sse2 neon neon_dotprod/;
    910 
    911  specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/;
    912  specialize qw/aom_sad128x64_avg  avx2 sse2 neon neon_dotprod/;
    913  specialize qw/aom_sad64x128_avg  avx2 sse2 neon neon_dotprod/;
    914  specialize qw/aom_sad64x64_avg   avx2 sse2 neon neon_dotprod/;
    915  specialize qw/aom_sad64x32_avg   avx2 sse2 neon neon_dotprod/;
    916  specialize qw/aom_sad32x64_avg   avx2 sse2 neon neon_dotprod/;
    917 
    918  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
    919    specialize qw/aom_sad128x128_avg    avx512/;
    920    specialize qw/aom_sad128x64_avg     avx512/;
    921    specialize qw/aom_sad64x128_avg     avx512/;
    922    specialize qw/aom_sad64x64_avg      avx512/;
    923    specialize qw/aom_sad64x32_avg      avx512/;
    924  }
    925 
    926  specialize qw/aom_sad32x32_avg   avx2 sse2 neon neon_dotprod/;
    927  specialize qw/aom_sad32x16_avg   avx2 sse2 neon neon_dotprod/;
    928  specialize qw/aom_sad16x32_avg        sse2 neon neon_dotprod/;
    929  specialize qw/aom_sad16x16_avg        sse2 neon neon_dotprod/;
    930  specialize qw/aom_sad16x8_avg         sse2 neon neon_dotprod/;
    931  specialize qw/aom_sad8x16_avg         sse2 neon/;
    932  specialize qw/aom_sad8x8_avg          sse2 neon/;
    933 
    934  specialize qw/aom_sad8x32_avg         sse2 neon/;
    935  specialize qw/aom_sad32x8_avg         sse2 neon neon_dotprod/;
    936  specialize qw/aom_sad16x64_avg        sse2 neon neon_dotprod/;
    937  specialize qw/aom_sad64x16_avg        sse2 neon neon_dotprod/;
    938 
    939  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
    940    foreach (@encoder_block_sizes) {
    941      ($w, $h) = @$_;
    942      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    943      if ($h >= 16) {
    944        add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    945      }
    946      if ($w != 4 && $h != 4) {
    947        add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    948      }
    949      if ($w != 128 && $h != 128 && $w != 4) {
    950        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
    951        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
    952      }
    953    }
    954    specialize qw/aom_highbd_sad128x128 avx2      neon/;
    955    specialize qw/aom_highbd_sad128x64  avx2      neon/;
    956    specialize qw/aom_highbd_sad64x128  avx2      neon/;
    957    specialize qw/aom_highbd_sad64x64   avx2 sse2 neon/;
    958    specialize qw/aom_highbd_sad64x32   avx2 sse2 neon/;
    959    specialize qw/aom_highbd_sad32x64   avx2 sse2 neon/;
    960    specialize qw/aom_highbd_sad32x32   avx2 sse2 neon/;
    961    specialize qw/aom_highbd_sad32x16   avx2 sse2 neon/;
    962    specialize qw/aom_highbd_sad16x32   avx2 sse2 neon/;
    963    specialize qw/aom_highbd_sad16x16   avx2 sse2 neon/;
    964    specialize qw/aom_highbd_sad16x8    avx2 sse2 neon/;
    965    specialize qw/aom_highbd_sad8x16         sse2 neon/;
    966    specialize qw/aom_highbd_sad8x8          sse2 neon/;
    967    specialize qw/aom_highbd_sad8x4          sse2 neon/;
    968    specialize qw/aom_highbd_sad4x8          sse2 neon/;
    969    specialize qw/aom_highbd_sad4x4          sse2 neon/;
    970 
    971    specialize qw/aom_highbd_sad4x16         sse2 neon/;
    972    specialize qw/aom_highbd_sad16x4    avx2 sse2 neon/;
    973    specialize qw/aom_highbd_sad8x32         sse2 neon/;
    974    specialize qw/aom_highbd_sad32x8    avx2 sse2 neon/;
    975    specialize qw/aom_highbd_sad16x64   avx2 sse2 neon/;
    976    specialize qw/aom_highbd_sad64x16   avx2 sse2 neon/;
    977 
    978    specialize qw/aom_highbd_sad_skip_128x128 avx2      neon/;
    979    specialize qw/aom_highbd_sad_skip_128x64  avx2      neon/;
    980    specialize qw/aom_highbd_sad_skip_64x128  avx2      neon/;
    981    specialize qw/aom_highbd_sad_skip_64x64   avx2 sse2 neon/;
    982    specialize qw/aom_highbd_sad_skip_64x32   avx2 sse2 neon/;
    983    specialize qw/aom_highbd_sad_skip_32x64   avx2 sse2 neon/;
    984    specialize qw/aom_highbd_sad_skip_32x32   avx2 sse2 neon/;
    985    specialize qw/aom_highbd_sad_skip_32x16   avx2 sse2 neon/;
    986    specialize qw/aom_highbd_sad_skip_16x32   avx2 sse2 neon/;
    987    specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2 neon/;
    988    specialize qw/aom_highbd_sad_skip_8x16         sse2 neon/;
    989 
    990    specialize qw/aom_highbd_sad_skip_4x16         sse2 neon/;
    991    specialize qw/aom_highbd_sad_skip_8x32         sse2 neon/;
    992    specialize qw/aom_highbd_sad_skip_16x64   avx2 sse2 neon/;
    993    specialize qw/aom_highbd_sad_skip_64x16   avx2 sse2 neon/;
    994 
    995    specialize qw/aom_highbd_sad128x128_avg avx2      neon/;
    996    specialize qw/aom_highbd_sad128x64_avg  avx2      neon/;
    997    specialize qw/aom_highbd_sad64x128_avg  avx2      neon/;
    998    specialize qw/aom_highbd_sad64x64_avg   avx2 sse2 neon/;
    999    specialize qw/aom_highbd_sad64x32_avg   avx2 sse2 neon/;
   1000    specialize qw/aom_highbd_sad32x64_avg   avx2 sse2 neon/;
   1001    specialize qw/aom_highbd_sad32x32_avg   avx2 sse2 neon/;
   1002    specialize qw/aom_highbd_sad32x16_avg   avx2 sse2 neon/;
   1003    specialize qw/aom_highbd_sad16x32_avg   avx2 sse2 neon/;
   1004    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2 neon/;
   1005    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2 neon/;
   1006    specialize qw/aom_highbd_sad8x16_avg              neon/;
   1007    specialize qw/aom_highbd_sad8x8_avg               neon/;
   1008 
   1009    specialize qw/aom_highbd_sad8x32_avg         sse2 neon/;
   1010    specialize qw/aom_highbd_sad16x64_avg   avx2 sse2 neon/;
   1011    specialize qw/aom_highbd_sad32x8_avg    avx2 sse2 neon/;
   1012    specialize qw/aom_highbd_sad64x16_avg   avx2 sse2 neon/;
   1013  }
   1014  #
   1015  # Masked SAD
   1016  #
   1017  foreach (@encoder_block_sizes) {
   1018    ($w, $h) = @$_;
   1019    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
   1020    specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
   1021  }
   1022 
   1023  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1024    foreach (@encoder_block_sizes) {
   1025      ($w, $h) = @$_;
   1026      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
   1027      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
   1028    }
   1029  }
   1030 
   1031  #
   1032  # OBMC SAD
   1033  #
   1034  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1035    foreach (@encoder_block_sizes) {
   1036      ($w, $h) = @$_;
   1037      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
   1038      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
   1039        specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
   1040      }
   1041    }
   1042 
   1043    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1044      foreach (@encoder_block_sizes) {
   1045        ($w, $h) = @$_;
   1046        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
   1047        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
   1048          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
   1049        }
   1050      }
   1051    }
   1052  }
   1053 
   1054  #
   1055  # Multi-block SAD, comparing a reference to N independent blocks
   1056  #
   1057  foreach (@encoder_block_sizes) {
   1058    ($w, $h) = @$_;
   1059    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1060    add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1061    if ($h >= 16) {
   1062      add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1063    }
   1064  }
   1065 
   1066  specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/;
   1067  specialize qw/aom_sad128x64x4d  avx2 sse2 neon neon_dotprod/;
   1068  specialize qw/aom_sad64x128x4d  avx2 sse2 neon neon_dotprod/;
   1069  specialize qw/aom_sad64x64x4d   avx2 sse2 neon neon_dotprod/;
   1070  specialize qw/aom_sad64x32x4d   avx2 sse2 neon neon_dotprod/;
   1071  specialize qw/aom_sad32x64x4d   avx2 sse2 neon neon_dotprod/;
   1072 
   1073  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
   1074    specialize qw/aom_sad128x128x4d avx512/;
   1075    specialize qw/aom_sad128x64x4d  avx512/;
   1076    specialize qw/aom_sad64x128x4d  avx512/;
   1077    specialize qw/aom_sad64x64x4d   avx512/;
   1078    specialize qw/aom_sad64x32x4d   avx512/;
   1079  }
   1080 
   1081  specialize qw/aom_sad32x32x4d   avx2 sse2 neon neon_dotprod/;
   1082  specialize qw/aom_sad32x16x4d   avx2 sse2 neon neon_dotprod/;
   1083  specialize qw/aom_sad16x32x4d   avx2 sse2 neon neon_dotprod/;
   1084  specialize qw/aom_sad16x16x4d   avx2 sse2 neon neon_dotprod/;
   1085  specialize qw/aom_sad16x8x4d    avx2 sse2 neon neon_dotprod/;
   1086 
   1087  specialize qw/aom_sad8x16x4d         sse2 neon/;
   1088  specialize qw/aom_sad8x8x4d          sse2 neon/;
   1089  specialize qw/aom_sad8x4x4d          sse2 neon/;
   1090  specialize qw/aom_sad4x8x4d          sse2 neon/;
   1091  specialize qw/aom_sad4x4x4d          sse2 neon/;
   1092 
   1093  specialize qw/aom_sad64x16x4d   avx2 sse2 neon neon_dotprod/;
   1094  specialize qw/aom_sad32x8x4d    avx2 sse2 neon neon_dotprod/;
   1095  specialize qw/aom_sad16x64x4d   avx2 sse2 neon neon_dotprod/;
   1096  specialize qw/aom_sad16x4x4d    avx2 sse2 neon neon_dotprod/;
   1097  specialize qw/aom_sad8x32x4d         sse2 neon/;
   1098  specialize qw/aom_sad4x16x4d         sse2 neon/;
   1099 
   1100  specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/;
   1101  specialize qw/aom_sad_skip_128x64x4d  avx2 sse2 neon neon_dotprod/;
   1102  specialize qw/aom_sad_skip_64x128x4d  avx2 sse2 neon neon_dotprod/;
   1103  specialize qw/aom_sad_skip_64x64x4d   avx2 sse2 neon neon_dotprod/;
   1104  specialize qw/aom_sad_skip_64x32x4d   avx2 sse2 neon neon_dotprod/;
   1105  specialize qw/aom_sad_skip_64x16x4d   avx2 sse2 neon neon_dotprod/;
   1106  specialize qw/aom_sad_skip_32x64x4d   avx2 sse2 neon neon_dotprod/;
   1107  specialize qw/aom_sad_skip_32x32x4d   avx2 sse2 neon neon_dotprod/;
   1108  specialize qw/aom_sad_skip_32x16x4d   avx2 sse2 neon neon_dotprod/;
   1109 
   1110  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
   1111    specialize qw/aom_sad_skip_128x128x4d avx512/;
   1112    specialize qw/aom_sad_skip_128x64x4d  avx512/;
   1113    specialize qw/aom_sad_skip_64x128x4d  avx512/;
   1114    specialize qw/aom_sad_skip_64x64x4d   avx512/;
   1115    specialize qw/aom_sad_skip_64x32x4d   avx512/;
   1116  }
   1117 
   1118  specialize qw/aom_sad_skip_16x64x4d   avx2 sse2 neon neon_dotprod/;
   1119  specialize qw/aom_sad_skip_16x32x4d   avx2 sse2 neon neon_dotprod/;
   1120  specialize qw/aom_sad_skip_16x16x4d   avx2 sse2 neon neon_dotprod/;
   1121  specialize qw/aom_sad_skip_16x8x4d    avx2 sse2 neon neon_dotprod/;
   1122  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
   1123  specialize qw/aom_sad_skip_8x16x4d         sse2 neon/;
   1124  specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
   1125 
   1126  specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/;
   1127  specialize qw/aom_sad128x64x3d  avx2 neon neon_dotprod/;
   1128  specialize qw/aom_sad64x128x3d  avx2 neon neon_dotprod/;
   1129  specialize qw/aom_sad64x64x3d   avx2 neon neon_dotprod/;
   1130  specialize qw/aom_sad64x32x3d   avx2 neon neon_dotprod/;
   1131 
   1132  if(aom_config("CONFIG_HIGHWAY") eq "yes") {
   1133    specialize qw/aom_sad128x128x3d avx512/;
   1134    specialize qw/aom_sad128x64x3d  avx512/;
   1135    specialize qw/aom_sad64x128x3d  avx512/;
   1136    specialize qw/aom_sad64x64x3d   avx512/;
   1137    specialize qw/aom_sad64x32x3d   avx512/;
   1138  }
   1139 
   1140  specialize qw/aom_sad32x64x3d   avx2 neon neon_dotprod/;
   1141  specialize qw/aom_sad32x32x3d   avx2 neon neon_dotprod/;
   1142  specialize qw/aom_sad32x16x3d   avx2 neon neon_dotprod/;
   1143  specialize qw/aom_sad16x32x3d   avx2 neon neon_dotprod/;
   1144  specialize qw/aom_sad16x16x3d   avx2 neon neon_dotprod/;
   1145  specialize qw/aom_sad16x8x3d    avx2 neon neon_dotprod/;
   1146  specialize qw/aom_sad8x16x3d         neon/;
   1147  specialize qw/aom_sad8x8x3d          neon/;
   1148  specialize qw/aom_sad8x4x3d          neon/;
   1149  specialize qw/aom_sad4x8x3d          neon/;
   1150  specialize qw/aom_sad4x4x3d          neon/;
   1151 
   1152  specialize qw/aom_sad64x16x3d   avx2 neon neon_dotprod/;
   1153  specialize qw/aom_sad32x8x3d    avx2 neon neon_dotprod/;
   1154  specialize qw/aom_sad16x64x3d   avx2 neon neon_dotprod/;
   1155  specialize qw/aom_sad16x4x3d    avx2 neon neon_dotprod/;
   1156  specialize qw/aom_sad8x32x3d         neon/;
   1157  specialize qw/aom_sad4x16x3d         neon/;
   1158 
   1159  #
   1160  # Multi-block SAD, comparing a reference to N independent blocks
   1161  #
   1162  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1163    foreach (@encoder_block_sizes) {
   1164      ($w, $h) = @$_;
   1165      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1166      add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1167      if ($h >= 16) {
   1168        add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
   1169      }
   1170      if ($w != 128 && $h != 128) {
   1171        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
   1172      }
   1173    }
   1174    specialize qw/aom_highbd_sad128x128x4d      avx2 neon/;
   1175    specialize qw/aom_highbd_sad128x64x4d       avx2 neon/;
   1176    specialize qw/aom_highbd_sad64x128x4d       avx2 neon/;
   1177    specialize qw/aom_highbd_sad64x64x4d   sse2 avx2 neon/;
   1178    specialize qw/aom_highbd_sad64x32x4d   sse2 avx2 neon/;
   1179    specialize qw/aom_highbd_sad32x64x4d   sse2 avx2 neon/;
   1180    specialize qw/aom_highbd_sad32x32x4d   sse2 avx2 neon/;
   1181    specialize qw/aom_highbd_sad32x16x4d   sse2 avx2 neon/;
   1182    specialize qw/aom_highbd_sad16x32x4d   sse2 avx2 neon/;
   1183    specialize qw/aom_highbd_sad16x16x4d   sse2 avx2 neon/;
   1184    specialize qw/aom_highbd_sad16x8x4d    sse2 avx2 neon/;
   1185    specialize qw/aom_highbd_sad8x16x4d    sse2      neon/;
   1186    specialize qw/aom_highbd_sad8x8x4d     sse2      neon/;
   1187    specialize qw/aom_highbd_sad8x4x4d     sse2      neon/;
   1188    specialize qw/aom_highbd_sad4x8x4d     sse2      neon/;
   1189    specialize qw/aom_highbd_sad4x4x4d     sse2      neon/;
   1190 
   1191    specialize qw/aom_highbd_sad4x16x4d         sse2 neon/;
   1192    specialize qw/aom_highbd_sad16x4x4d    avx2 sse2 neon/;
   1193    specialize qw/aom_highbd_sad8x32x4d         sse2 neon/;
   1194    specialize qw/aom_highbd_sad32x8x4d    avx2 sse2 neon/;
   1195    specialize qw/aom_highbd_sad16x64x4d   avx2 sse2 neon/;
   1196    specialize qw/aom_highbd_sad64x16x4d   avx2 sse2 neon/;
   1197 
   1198    specialize qw/aom_highbd_sad_skip_128x128x4d avx2      neon/;
   1199    specialize qw/aom_highbd_sad_skip_128x64x4d  avx2      neon/;
   1200    specialize qw/aom_highbd_sad_skip_64x128x4d  avx2      neon/;
   1201    specialize qw/aom_highbd_sad_skip_64x64x4d   avx2 sse2 neon/;
   1202    specialize qw/aom_highbd_sad_skip_64x32x4d   avx2 sse2 neon/;
   1203    specialize qw/aom_highbd_sad_skip_32x64x4d   avx2 sse2 neon/;
   1204    specialize qw/aom_highbd_sad_skip_32x32x4d   avx2 sse2 neon/;
   1205    specialize qw/aom_highbd_sad_skip_32x16x4d   avx2 sse2 neon/;
   1206    specialize qw/aom_highbd_sad_skip_16x32x4d   avx2 sse2 neon/;
   1207    specialize qw/aom_highbd_sad_skip_16x16x4d   avx2 sse2 neon/;
   1208    specialize qw/aom_highbd_sad_skip_8x16x4d         sse2 neon/;
   1209 
   1210    specialize qw/aom_highbd_sad_skip_4x16x4d         sse2 neon/;
   1211    specialize qw/aom_highbd_sad_skip_8x32x4d         sse2 neon/;
   1212    specialize qw/aom_highbd_sad_skip_16x64x4d   avx2 sse2 neon/;
   1213    specialize qw/aom_highbd_sad_skip_64x16x4d   avx2 sse2 neon/;
   1214 
   1215    specialize qw/aom_highbd_sad128x128x3d avx2 neon/;
   1216    specialize qw/aom_highbd_sad128x64x3d  avx2 neon/;
   1217    specialize qw/aom_highbd_sad64x128x3d  avx2 neon/;
   1218    specialize qw/aom_highbd_sad64x64x3d   avx2 neon/;
   1219    specialize qw/aom_highbd_sad64x32x3d   avx2 neon/;
   1220    specialize qw/aom_highbd_sad32x64x3d   avx2 neon/;
   1221    specialize qw/aom_highbd_sad32x32x3d   avx2 neon/;
   1222    specialize qw/aom_highbd_sad32x16x3d   avx2 neon/;
   1223    specialize qw/aom_highbd_sad16x32x3d   avx2 neon/;
   1224    specialize qw/aom_highbd_sad16x16x3d   avx2 neon/;
   1225    specialize qw/aom_highbd_sad16x8x3d    avx2 neon/;
   1226    specialize qw/aom_highbd_sad8x16x3d         neon/;
   1227    specialize qw/aom_highbd_sad8x8x3d          neon/;
   1228    specialize qw/aom_highbd_sad8x4x3d          neon/;
   1229    specialize qw/aom_highbd_sad4x8x3d          neon/;
   1230    specialize qw/aom_highbd_sad4x4x3d          neon/;
   1231 
   1232    specialize qw/aom_highbd_sad64x16x3d   avx2 neon/;
   1233    specialize qw/aom_highbd_sad32x8x3d    avx2 neon/;
   1234    specialize qw/aom_highbd_sad16x64x3d   avx2 neon/;
   1235    specialize qw/aom_highbd_sad16x4x3d    avx2 neon/;
   1236    specialize qw/aom_highbd_sad8x32x3d         neon/;
   1237    specialize qw/aom_highbd_sad4x16x3d         neon/;
   1238  }
   1239  #
   1240  # Avg
   1241  #
   1242  add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p";
   1243  specialize qw/aom_avg_8x8 sse2 neon/;
   1244 
   1245  add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p";
   1246  specialize qw/aom_avg_4x4 sse2 neon/;
   1247 
   1248  add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg";
   1249  specialize qw/aom_avg_8x8_quad avx2 sse2 neon/;
   1250 
   1251  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   1252  specialize qw/aom_minmax_8x8 sse2 neon/;
   1253 
   1254  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1255    add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
   1256    specialize qw/aom_highbd_avg_8x8 neon/;
   1257    add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
   1258    specialize qw/aom_highbd_avg_4x4 neon/;
   1259    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   1260    specialize qw/aom_highbd_minmax_8x8 neon/;
   1261  }
   1262 
   1263  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
   1264  specialize qw/aom_int_pro_row avx2 sse2 neon/;
   1265 
   1266  add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
   1267  specialize qw/aom_int_pro_col avx2 sse2 neon/;
   1268 
   1269  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
   1270  specialize qw/aom_vector_var avx2 sse4_1 neon sve/;
   1271 
   1272  #
   1273  # hamadard transform and satd for implmenting temporal dependency model
   1274  #
   1275  add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1276  specialize qw/aom_hadamard_4x4 sse2 neon/;
   1277 
   1278  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1279  specialize qw/aom_hadamard_8x8 sse2 neon/;
   1280 
   1281  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1282  specialize qw/aom_hadamard_16x16 avx2 sse2 neon/;
   1283 
   1284  add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1285  specialize qw/aom_hadamard_32x32 avx2 sse2 neon/;
   1286 
   1287  add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
   1288  specialize qw/aom_hadamard_lp_8x8 sse2 neon/;
   1289 
   1290  add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
   1291  specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
   1292 
   1293  add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
   1294  specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/;
   1295 
   1296  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1297    add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1298    specialize qw/aom_highbd_hadamard_8x8 avx2 neon/;
   1299 
   1300    add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1301    specialize qw/aom_highbd_hadamard_16x16 avx2 neon/;
   1302 
   1303    add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
   1304    specialize qw/aom_highbd_hadamard_32x32 avx2 neon/;
   1305  }
   1306  add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length";
   1307  specialize qw/aom_satd neon sse2 avx2/;
   1308 
   1309  add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length";
   1310  specialize qw/aom_satd_lp sse2 avx2 neon/;
   1311 
   1312 
   1313  #
   1314  # Structured Similarity (SSIM)
   1315  #
   1316  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
   1317  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
   1318 
   1319  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1320    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
   1321  }
   1322 }  # CONFIG_AV1_ENCODER
   1323 
   1324 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   1325 
   1326  #
   1327  # Specialty Variance
   1328  #
   1329  add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8";
   1330  specialize qw/aom_get_var_sse_sum_8x8_quad        avx2 sse2 neon neon_dotprod/;
   1331 
   1332  add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16";
   1333  specialize qw/aom_get_var_sse_sum_16x16_dual        avx2 sse2 neon neon_dotprod/;
   1334 
   1335  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1336  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1337  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1338  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1339 
   1340  specialize qw/aom_mse16x16          sse2 avx2 neon neon_dotprod/;
   1341  specialize qw/aom_mse16x8           sse2      neon neon_dotprod/;
   1342  specialize qw/aom_mse8x16           sse2      neon neon_dotprod/;
   1343  specialize qw/aom_mse8x8            sse2      neon neon_dotprod/;
   1344 
   1345  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1346    foreach $bd (8, 10, 12) {
   1347      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1348      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1349      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1350      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   1351 
   1352      if ($bd eq 8) {
   1353        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/;
   1354        specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
   1355        specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
   1356        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
   1357      } elsif ($bd eq 10) {
   1358        specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/;
   1359        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
   1360        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
   1361        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
   1362      } else {
   1363        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
   1364        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
   1365        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
   1366        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
   1367      }
   1368 
   1369    }
   1370  }
   1371 
   1372  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1373    add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
   1374    specialize qw/aom_get_mb_ss sse2 neon/;
   1375  }
   1376 
   1377  #
   1378  # Variance / Subpixel Variance / Subpixel Avg Variance
   1379  #
   1380  add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
   1381  specialize qw/aom_mse_wxh_16bit  sse2 avx2 neon/;
   1382 
   1383  add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
   1384  specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/;
   1385 
   1386  foreach (@encoder_block_sizes) {
   1387    ($w, $h) = @$_;
   1388    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
   1389    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1390    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1391  }
   1392  specialize qw/aom_variance128x128   sse2 avx2 neon neon_dotprod/;
   1393  specialize qw/aom_variance128x64    sse2 avx2 neon neon_dotprod/;
   1394  specialize qw/aom_variance64x128    sse2 avx2 neon neon_dotprod/;
   1395  specialize qw/aom_variance64x64     sse2 avx2 neon neon_dotprod/;
   1396  specialize qw/aom_variance64x32     sse2 avx2 neon neon_dotprod/;
   1397  specialize qw/aom_variance32x64     sse2 avx2 neon neon_dotprod/;
   1398  specialize qw/aom_variance32x32     sse2 avx2 neon neon_dotprod/;
   1399  specialize qw/aom_variance32x16     sse2 avx2 neon neon_dotprod/;
   1400  specialize qw/aom_variance16x32     sse2 avx2 neon neon_dotprod/;
   1401  specialize qw/aom_variance16x16     sse2 avx2 neon neon_dotprod/;
   1402  specialize qw/aom_variance16x8      sse2 avx2 neon neon_dotprod/;
   1403  specialize qw/aom_variance8x16      sse2      neon neon_dotprod/;
   1404  specialize qw/aom_variance8x8       sse2      neon neon_dotprod/;
   1405  specialize qw/aom_variance8x4       sse2      neon neon_dotprod/;
   1406  specialize qw/aom_variance4x8       sse2      neon neon_dotprod/;
   1407  specialize qw/aom_variance4x4       sse2      neon neon_dotprod/;
   1408 
   1409  specialize qw/aom_sub_pixel_variance128x128   avx2 neon ssse3/;
   1410  specialize qw/aom_sub_pixel_variance128x64    avx2 neon ssse3/;
   1411  specialize qw/aom_sub_pixel_variance64x128    avx2 neon ssse3/;
   1412  specialize qw/aom_sub_pixel_variance64x64     avx2 neon ssse3/;
   1413  specialize qw/aom_sub_pixel_variance64x32     avx2 neon ssse3/;
   1414  specialize qw/aom_sub_pixel_variance32x64     avx2 neon ssse3/;
   1415  specialize qw/aom_sub_pixel_variance32x32     avx2 neon ssse3/;
   1416  specialize qw/aom_sub_pixel_variance32x16     avx2 neon ssse3/;
   1417  specialize qw/aom_sub_pixel_variance16x32     avx2 neon ssse3/;
   1418  specialize qw/aom_sub_pixel_variance16x16     avx2 neon ssse3/;
   1419  specialize qw/aom_sub_pixel_variance16x8      avx2 neon ssse3/;
   1420  specialize qw/aom_sub_pixel_variance8x16           neon ssse3/;
   1421  specialize qw/aom_sub_pixel_variance8x8            neon ssse3/;
   1422  specialize qw/aom_sub_pixel_variance8x4            neon ssse3/;
   1423  specialize qw/aom_sub_pixel_variance4x8            neon ssse3/;
   1424  specialize qw/aom_sub_pixel_variance4x4            neon ssse3/;
   1425 
   1426  specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/;
   1427  specialize qw/aom_sub_pixel_avg_variance128x64  avx2 neon ssse3/;
   1428  specialize qw/aom_sub_pixel_avg_variance64x128  avx2 neon ssse3/;
   1429  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 neon ssse3/;
   1430  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 neon ssse3/;
   1431  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 neon ssse3/;
   1432  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 neon ssse3/;
   1433  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 neon ssse3/;
   1434  specialize qw/aom_sub_pixel_avg_variance16x32        neon ssse3/;
   1435  specialize qw/aom_sub_pixel_avg_variance16x16        neon ssse3/;
   1436  specialize qw/aom_sub_pixel_avg_variance16x8         neon ssse3/;
   1437  specialize qw/aom_sub_pixel_avg_variance8x16         neon ssse3/;
   1438  specialize qw/aom_sub_pixel_avg_variance8x8          neon ssse3/;
   1439  specialize qw/aom_sub_pixel_avg_variance8x4          neon ssse3/;
   1440  specialize qw/aom_sub_pixel_avg_variance4x8          neon ssse3/;
   1441  specialize qw/aom_sub_pixel_avg_variance4x4          neon ssse3/;
   1442 
   1443  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1444    specialize qw/aom_variance4x16  neon neon_dotprod sse2/;
   1445    specialize qw/aom_variance16x4  neon neon_dotprod sse2 avx2/;
   1446    specialize qw/aom_variance8x32  neon neon_dotprod sse2/;
   1447    specialize qw/aom_variance32x8  neon neon_dotprod sse2 avx2/;
   1448    specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/;
   1449    specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/;
   1450 
   1451    specialize qw/aom_sub_pixel_variance4x16 neon ssse3/;
   1452    specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/;
   1453    specialize qw/aom_sub_pixel_variance8x32 neon ssse3/;
   1454    specialize qw/aom_sub_pixel_variance32x8 neon ssse3/;
   1455    specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/;
   1456    specialize qw/aom_sub_pixel_variance64x16 neon ssse3/;
   1457    specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/;
   1458    specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/;
   1459    specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/;
   1460    specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/;
   1461    specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/;
   1462    specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/;
   1463  }
   1464 
   1465  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1466    foreach $bd (8, 10, 12) {
   1467      foreach (@encoder_block_sizes) {
   1468        ($w, $h) = @$_;
   1469        add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1470        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
   1471        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   1472      }
   1473    }
   1474 
   1475    specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/;
   1476    specialize qw/aom_highbd_12_variance128x64  sse2 neon sve/;
   1477    specialize qw/aom_highbd_12_variance64x128  sse2 neon sve/;
   1478    specialize qw/aom_highbd_12_variance64x64   sse2 neon sve/;
   1479    specialize qw/aom_highbd_12_variance64x32   sse2 neon sve/;
   1480    specialize qw/aom_highbd_12_variance32x64   sse2 neon sve/;
   1481    specialize qw/aom_highbd_12_variance32x32   sse2 neon sve/;
   1482    specialize qw/aom_highbd_12_variance32x16   sse2 neon sve/;
   1483    specialize qw/aom_highbd_12_variance16x32   sse2 neon sve/;
   1484    specialize qw/aom_highbd_12_variance16x16   sse2 neon sve/;
   1485    specialize qw/aom_highbd_12_variance16x8    sse2 neon sve/;
   1486    specialize qw/aom_highbd_12_variance8x16    sse2 neon sve/;
   1487    specialize qw/aom_highbd_12_variance8x8     sse2 neon sve/;
   1488    specialize qw/aom_highbd_12_variance8x4          neon sve/;
   1489    specialize qw/aom_highbd_12_variance4x8          neon sve/;
   1490    specialize qw/aom_highbd_12_variance4x4   sse4_1 neon sve/;
   1491 
   1492    specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/;
   1493    specialize qw/aom_highbd_10_variance128x64  sse2 avx2 neon sve/;
   1494    specialize qw/aom_highbd_10_variance64x128  sse2 avx2 neon sve/;
   1495    specialize qw/aom_highbd_10_variance64x64   sse2 avx2 neon sve/;
   1496    specialize qw/aom_highbd_10_variance64x32   sse2 avx2 neon sve/;
   1497    specialize qw/aom_highbd_10_variance32x64   sse2 avx2 neon sve/;
   1498    specialize qw/aom_highbd_10_variance32x32   sse2 avx2 neon sve/;
   1499    specialize qw/aom_highbd_10_variance32x16   sse2 avx2 neon sve/;
   1500    specialize qw/aom_highbd_10_variance16x32   sse2 avx2 neon sve/;
   1501    specialize qw/aom_highbd_10_variance16x16   sse2 avx2 neon sve/;
   1502    specialize qw/aom_highbd_10_variance16x8    sse2 avx2 neon sve/;
   1503    specialize qw/aom_highbd_10_variance8x16    sse2 avx2 neon sve/;
   1504    specialize qw/aom_highbd_10_variance8x8     sse2 avx2 neon sve/;
   1505    specialize qw/aom_highbd_10_variance8x4               neon sve/;
   1506    specialize qw/aom_highbd_10_variance4x8               neon sve/;
   1507    specialize qw/aom_highbd_10_variance4x4   sse4_1      neon sve/;
   1508 
   1509    specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/;
   1510    specialize qw/aom_highbd_8_variance128x64  sse2 neon sve/;
   1511    specialize qw/aom_highbd_8_variance64x128  sse2 neon sve/;
   1512    specialize qw/aom_highbd_8_variance64x64   sse2 neon sve/;
   1513    specialize qw/aom_highbd_8_variance64x32   sse2 neon sve/;
   1514    specialize qw/aom_highbd_8_variance32x64   sse2 neon sve/;
   1515    specialize qw/aom_highbd_8_variance32x32   sse2 neon sve/;
   1516    specialize qw/aom_highbd_8_variance32x16   sse2 neon sve/;
   1517    specialize qw/aom_highbd_8_variance16x32   sse2 neon sve/;
   1518    specialize qw/aom_highbd_8_variance16x16   sse2 neon sve/;
   1519    specialize qw/aom_highbd_8_variance16x8    sse2 neon sve/;
   1520    specialize qw/aom_highbd_8_variance8x16    sse2 neon sve/;
   1521    specialize qw/aom_highbd_8_variance8x8     sse2 neon sve/;
   1522    specialize qw/aom_highbd_8_variance8x4          neon sve/;
   1523    specialize qw/aom_highbd_8_variance4x8          neon sve/;
   1524    specialize qw/aom_highbd_8_variance4x4   sse4_1 neon sve/;
   1525 
   1526    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1527      foreach $bd (8, 10, 12) {
   1528        my $avx2 = ($bd == 10) ? "avx2" : "";
   1529        specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/;
   1530        specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/;
   1531        specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/;
   1532        specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/;
   1533        specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/;
   1534        specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/;
   1535      }
   1536    }
   1537 
   1538    specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/;
   1539    specialize qw/aom_highbd_12_sub_pixel_variance128x64  sse2 neon/;
   1540    specialize qw/aom_highbd_12_sub_pixel_variance64x128  sse2 neon/;
   1541    specialize qw/aom_highbd_12_sub_pixel_variance64x64   sse2 neon/;
   1542    specialize qw/aom_highbd_12_sub_pixel_variance64x32   sse2 neon/;
   1543    specialize qw/aom_highbd_12_sub_pixel_variance32x64   sse2 neon/;
   1544    specialize qw/aom_highbd_12_sub_pixel_variance32x32   sse2 neon/;
   1545    specialize qw/aom_highbd_12_sub_pixel_variance32x16   sse2 neon/;
   1546    specialize qw/aom_highbd_12_sub_pixel_variance16x32   sse2 neon/;
   1547    specialize qw/aom_highbd_12_sub_pixel_variance16x16   sse2 neon/;
   1548    specialize qw/aom_highbd_12_sub_pixel_variance16x8    sse2 neon/;
   1549    specialize qw/aom_highbd_12_sub_pixel_variance8x16    sse2 neon/;
   1550    specialize qw/aom_highbd_12_sub_pixel_variance8x8     sse2 neon/;
   1551    specialize qw/aom_highbd_12_sub_pixel_variance8x4     sse2 neon/;
   1552    specialize qw/aom_highbd_12_sub_pixel_variance4x8          neon/;
   1553    specialize qw/aom_highbd_12_sub_pixel_variance4x4   sse4_1 neon/;
   1554 
   1555    specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/;
   1556    specialize qw/aom_highbd_10_sub_pixel_variance128x64  sse2 avx2 neon/;
   1557    specialize qw/aom_highbd_10_sub_pixel_variance64x128  sse2 avx2 neon/;
   1558    specialize qw/aom_highbd_10_sub_pixel_variance64x64   sse2 avx2 neon/;
   1559    specialize qw/aom_highbd_10_sub_pixel_variance64x32   sse2 avx2 neon/;
   1560    specialize qw/aom_highbd_10_sub_pixel_variance32x64   sse2 avx2 neon/;
   1561    specialize qw/aom_highbd_10_sub_pixel_variance32x32   sse2 avx2 neon/;
   1562    specialize qw/aom_highbd_10_sub_pixel_variance32x16   sse2 avx2 neon/;
   1563    specialize qw/aom_highbd_10_sub_pixel_variance16x32   sse2 avx2 neon/;
   1564    specialize qw/aom_highbd_10_sub_pixel_variance16x16   sse2 avx2 neon/;
   1565    specialize qw/aom_highbd_10_sub_pixel_variance16x8    sse2 avx2 neon/;
   1566    specialize qw/aom_highbd_10_sub_pixel_variance8x16    sse2 avx2 neon/;
   1567    specialize qw/aom_highbd_10_sub_pixel_variance8x8     sse2 avx2 neon/;
   1568    specialize qw/aom_highbd_10_sub_pixel_variance8x4     sse2      neon/;
   1569    specialize qw/aom_highbd_10_sub_pixel_variance4x8               neon/;
   1570    specialize qw/aom_highbd_10_sub_pixel_variance4x4   sse4_1      neon/;
   1571 
   1572    specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/;
   1573    specialize qw/aom_highbd_8_sub_pixel_variance128x64  sse2 neon/;
   1574    specialize qw/aom_highbd_8_sub_pixel_variance64x128  sse2 neon/;
   1575    specialize qw/aom_highbd_8_sub_pixel_variance64x64   sse2 neon/;
   1576    specialize qw/aom_highbd_8_sub_pixel_variance64x32   sse2 neon/;
   1577    specialize qw/aom_highbd_8_sub_pixel_variance32x64   sse2 neon/;
   1578    specialize qw/aom_highbd_8_sub_pixel_variance32x32   sse2 neon/;
   1579    specialize qw/aom_highbd_8_sub_pixel_variance32x16   sse2 neon/;
   1580    specialize qw/aom_highbd_8_sub_pixel_variance16x32   sse2 neon/;
   1581    specialize qw/aom_highbd_8_sub_pixel_variance16x16   sse2 neon/;
   1582    specialize qw/aom_highbd_8_sub_pixel_variance16x8    sse2 neon/;
   1583    specialize qw/aom_highbd_8_sub_pixel_variance8x16    sse2 neon/;
   1584    specialize qw/aom_highbd_8_sub_pixel_variance8x8     sse2 neon/;
   1585    specialize qw/aom_highbd_8_sub_pixel_variance8x4     sse2 neon/;
   1586    specialize qw/aom_highbd_8_sub_pixel_variance4x8          neon/;
   1587    specialize qw/aom_highbd_8_sub_pixel_variance4x4   sse4_1 neon/;
   1588 
   1589    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1590      foreach $bd (8, 10, 12) {
   1591        specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/;
   1592        specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/;
   1593        specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/;
   1594        specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/;
   1595        specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/;
   1596        specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/;
   1597      }
   1598    }
   1599 
   1600    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128      neon/;
   1601    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64       neon/;
   1602    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128       neon/;
   1603    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64   sse2 neon/;
   1604    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32   sse2 neon/;
   1605    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64   sse2 neon/;
   1606    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32   sse2 neon/;
   1607    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16   sse2 neon/;
   1608    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32   sse2 neon/;
   1609    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16   sse2 neon/;
   1610    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8    sse2 neon/;
   1611    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16    sse2 neon/;
   1612    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8     sse2 neon/;
   1613    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4     sse2 neon/;
   1614    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8          neon/;
   1615    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4   sse4_1 neon/;
   1616 
   1617    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128      neon/;
   1618    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64       neon/;
   1619    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128       neon/;
   1620    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64   sse2 neon/;
   1621    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32   sse2 neon/;
   1622    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64   sse2 neon/;
   1623    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32   sse2 neon/;
   1624    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16   sse2 neon/;
   1625    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32   sse2 neon/;
   1626    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16   sse2 neon/;
   1627    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8    sse2 neon/;
   1628    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16    sse2 neon/;
   1629    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8     sse2 neon/;
   1630    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4     sse2 neon/;
   1631    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8          neon/;
   1632    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4   sse4_1 neon/;
   1633 
   1634    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128      neon/;
   1635    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64       neon/;
   1636    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128       neon/;
   1637    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64   sse2 neon/;
   1638    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32   sse2 neon/;
   1639    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64   sse2 neon/;
   1640    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32   sse2 neon/;
   1641    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16   sse2 neon/;
   1642    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32   sse2 neon/;
   1643    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16   sse2 neon/;
   1644    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8    sse2 neon/;
   1645    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16    sse2 neon/;
   1646    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8     sse2 neon/;
   1647    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4     sse2 neon/;
   1648    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8          neon/;
   1649    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4   sse4_1 neon/;
   1650 
   1651    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1652      foreach $bd (8, 10, 12) {
   1653        specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/;
   1654        specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/;
   1655        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/;
   1656        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/;
   1657        specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/;
   1658        specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/;
   1659      }
   1660    }
   1661  }
   1662  #
   1663  # Masked Variance / Masked Subpixel Variance
   1664  #
   1665  foreach (@encoder_block_sizes) {
   1666    ($w, $h) = @$_;
   1667    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
   1668    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
   1669  }
   1670 
   1671  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1672    foreach $bd ("_8_", "_10_", "_12_") {
   1673      foreach (@encoder_block_sizes) {
   1674        ($w, $h) = @$_;
   1675        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
   1676        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
   1677      }
   1678    }
   1679  }
   1680 
   1681  #
   1682  # OBMC Variance / OBMC Subpixel Variance
   1683  #
   1684  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1685    foreach (@encoder_block_sizes) {
   1686      ($w, $h) = @$_;
   1687      add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
   1688      add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
   1689      specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/;
   1690      specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/;
   1691    }
   1692 
   1693    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1694      foreach $bd ("_8_", "_10_", "_12_") {
   1695        foreach (@encoder_block_sizes) {
   1696          ($w, $h) = @$_;
   1697          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
   1698          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
   1699          specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/;
   1700          specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/;
   1701        }
   1702      }
   1703    }
   1704  }
   1705 
   1706  #
   1707  # Comp Avg
   1708  #
   1709  add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
   1710  specialize qw/aom_comp_avg_pred avx2 neon/;
   1711 
   1712  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1713    add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
   1714    specialize qw/aom_highbd_comp_avg_pred neon/;
   1715 
   1716    add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
   1717    specialize qw/aom_mse_wxh_16bit_highbd   sse2 avx2 neon sve/;
   1718  }
   1719 
   1720  add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
   1721  specialize qw/aom_comp_mask_pred ssse3 avx2 neon/;
   1722 
   1723  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   1724    add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
   1725    specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/;
   1726  }
   1727 
   1728  # Flow estimation library
   1729  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
   1730    add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev";
   1731    specialize qw/aom_compute_mean_stddev sse4_1 avx2/;
   1732 
   1733    add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2";
   1734    specialize qw/aom_compute_correlation sse4_1 avx2/;
   1735 
   1736    add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v";
   1737    specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/;
   1738  }
   1739 
   1740 }  # CONFIG_AV1_ENCODER
   1741 
   1742 1;