aom_dsp_rtcd_defs.pl (98273B)
1 ## 2 ## Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3 ## 4 ## This source code is subject to the terms of the BSD 2 Clause License and 5 ## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 ## was not distributed with this source code in the LICENSE file, you can 7 ## obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 ## Media Patent License 1.0 was not distributed with this source code in the 9 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 ## 11 sub aom_dsp_forward_decls() { 12 print <<EOF 13 /* 14 * DSP 15 */ 16 17 #include "aom/aom_integer.h" 18 #include "aom_dsp/aom_dsp_common.h" 19 #include "av1/common/blockd.h" 20 #include "av1/common/enums.h" 21 22 EOF 23 } 24 forward_decls qw/aom_dsp_forward_decls/; 25 26 # optimizations which depend on multiple features 27 $avx2_ssse3 = ''; 28 if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) { 29 $avx2_ssse3 = 'avx2'; 30 } 31 32 # functions that are 64 bit only. 33 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; 34 if ($opts{arch} eq "x86_64") { 35 $mmx_x86_64 = 'mmx'; 36 $sse2_x86_64 = 'sse2'; 37 $ssse3_x86_64 = 'ssse3'; 38 $avx_x86_64 = 'avx'; 39 $avx2_x86_64 = 'avx2'; 40 } 41 42 @block_widths = (4, 8, 16, 32, 64, 128); 43 44 @encoder_block_sizes = (); 45 foreach $w (@block_widths) { 46 foreach $h (@block_widths) { 47 push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w); 48 } 49 } 50 51 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 52 push @encoder_block_sizes, [4, 16]; 53 push @encoder_block_sizes, [16, 4]; 54 push @encoder_block_sizes, [8, 32]; 55 push @encoder_block_sizes, [32, 8]; 56 push @encoder_block_sizes, [16, 64]; 57 push @encoder_block_sizes, [64, 16]; 58 } 59 60 @tx_dims = (4, 8, 16, 32, 64); 61 @tx_sizes = (); 62 foreach $w (@tx_dims) { 63 push @tx_sizes, [$w, $w]; 64 foreach $h (@tx_dims) { 65 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w)); 66 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 67 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 68 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w)); 69 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 70 } 71 } 72 73 @pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/; 74 75 # 76 # Intra prediction 77 # 78 79 foreach (@tx_sizes) { 80 ($w, $h) = @$_; 81 foreach $pred_name (@pred_names) { 82 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}", 83 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 84 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 85 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}", 86 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; 87 } 88 } 89 } 90 91 specialize qw/aom_dc_top_predictor_4x4 neon sse2/; 92 specialize qw/aom_dc_top_predictor_4x8 neon sse2/; 93 specialize qw/aom_dc_top_predictor_8x4 neon sse2/; 94 specialize qw/aom_dc_top_predictor_8x8 neon sse2/; 95 specialize qw/aom_dc_top_predictor_8x16 neon sse2/; 96 specialize qw/aom_dc_top_predictor_16x8 neon sse2/; 97 specialize qw/aom_dc_top_predictor_16x16 neon sse2/; 98 specialize qw/aom_dc_top_predictor_16x32 neon sse2/; 99 specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/; 100 specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/; 101 specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/; 102 specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/; 103 specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/; 104 105 specialize qw/aom_dc_left_predictor_4x4 neon sse2/; 106 specialize qw/aom_dc_left_predictor_4x8 neon sse2/; 107 specialize qw/aom_dc_left_predictor_8x4 neon sse2/; 108 specialize qw/aom_dc_left_predictor_8x8 neon sse2/; 109 specialize qw/aom_dc_left_predictor_8x16 neon sse2/; 110 specialize qw/aom_dc_left_predictor_16x8 neon sse2/; 111 specialize qw/aom_dc_left_predictor_16x16 neon sse2/; 112 specialize qw/aom_dc_left_predictor_16x32 neon sse2/; 113 specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/; 114 specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/; 115 specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/; 116 specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/; 117 specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/; 118 119 specialize qw/aom_dc_128_predictor_4x4 neon sse2/; 120 specialize qw/aom_dc_128_predictor_4x8 neon sse2/; 121 specialize qw/aom_dc_128_predictor_8x4 neon sse2/; 122 specialize qw/aom_dc_128_predictor_8x8 neon sse2/; 123 specialize qw/aom_dc_128_predictor_8x16 neon sse2/; 124 specialize qw/aom_dc_128_predictor_16x8 neon sse2/; 125 specialize qw/aom_dc_128_predictor_16x16 neon sse2/; 126 specialize qw/aom_dc_128_predictor_16x32 neon sse2/; 127 specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/; 128 specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/; 129 specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/; 130 specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/; 131 specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/; 132 133 specialize qw/aom_v_predictor_4x4 neon sse2/; 134 specialize qw/aom_v_predictor_4x8 neon sse2/; 135 specialize qw/aom_v_predictor_8x4 neon sse2/; 136 specialize qw/aom_v_predictor_8x8 neon sse2/; 137 specialize qw/aom_v_predictor_8x16 neon sse2/; 138 specialize qw/aom_v_predictor_16x8 neon sse2/; 139 specialize qw/aom_v_predictor_16x16 neon sse2/; 140 specialize qw/aom_v_predictor_16x32 neon sse2/; 141 specialize qw/aom_v_predictor_32x16 neon sse2 avx2/; 142 specialize qw/aom_v_predictor_32x32 neon sse2 avx2/; 143 specialize qw/aom_v_predictor_32x64 neon sse2 avx2/; 144 specialize qw/aom_v_predictor_64x32 neon sse2 avx2/; 145 specialize qw/aom_v_predictor_64x64 neon sse2 avx2/; 146 147 specialize qw/aom_h_predictor_4x4 neon sse2/; 148 specialize qw/aom_h_predictor_4x8 neon sse2/; 149 specialize qw/aom_h_predictor_8x4 neon sse2/; 150 specialize qw/aom_h_predictor_8x8 neon sse2/; 151 specialize qw/aom_h_predictor_8x16 neon sse2/; 152 specialize qw/aom_h_predictor_16x8 neon sse2/; 153 specialize qw/aom_h_predictor_16x16 neon sse2/; 154 specialize qw/aom_h_predictor_16x32 neon sse2/; 155 specialize qw/aom_h_predictor_32x16 neon sse2/; 156 specialize qw/aom_h_predictor_32x32 neon sse2 avx2/; 157 specialize qw/aom_h_predictor_32x64 neon sse2/; 158 specialize qw/aom_h_predictor_64x32 neon sse2/; 159 specialize qw/aom_h_predictor_64x64 neon sse2/; 160 161 specialize qw/aom_paeth_predictor_4x4 ssse3 neon/; 162 specialize qw/aom_paeth_predictor_4x8 ssse3 neon/; 163 specialize qw/aom_paeth_predictor_8x4 ssse3 neon/; 164 specialize qw/aom_paeth_predictor_8x8 ssse3 neon/; 165 specialize qw/aom_paeth_predictor_8x16 ssse3 neon/; 166 specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/; 167 specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/; 168 specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/; 169 specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/; 170 specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/; 171 specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/; 172 specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/; 173 specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/; 174 175 specialize qw/aom_smooth_predictor_4x4 neon ssse3/; 176 specialize qw/aom_smooth_predictor_4x8 neon ssse3/; 177 specialize qw/aom_smooth_predictor_8x4 neon ssse3/; 178 specialize qw/aom_smooth_predictor_8x8 neon ssse3/; 179 specialize qw/aom_smooth_predictor_8x16 neon ssse3/; 180 specialize qw/aom_smooth_predictor_16x8 neon ssse3/; 181 specialize qw/aom_smooth_predictor_16x16 neon ssse3/; 182 specialize qw/aom_smooth_predictor_16x32 neon ssse3/; 183 specialize qw/aom_smooth_predictor_32x16 neon ssse3/; 184 specialize qw/aom_smooth_predictor_32x32 neon ssse3/; 185 specialize qw/aom_smooth_predictor_32x64 neon ssse3/; 186 specialize qw/aom_smooth_predictor_64x32 neon ssse3/; 187 specialize qw/aom_smooth_predictor_64x64 neon ssse3/; 188 189 specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/; 190 specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/; 191 specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/; 192 specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/; 193 specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/; 194 specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/; 195 specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/; 196 specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/; 197 specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/; 198 specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/; 199 specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/; 200 specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/; 201 specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/; 202 203 specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/; 204 specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/; 205 specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/; 206 specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/; 207 specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/; 208 specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/; 209 specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/; 210 specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/; 211 specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/; 212 specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/; 213 specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/; 214 specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/; 215 specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/; 216 217 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division 218 # by multiply and shift. 219 specialize qw/aom_dc_predictor_4x4 neon sse2/; 220 specialize qw/aom_dc_predictor_4x8 neon sse2/; 221 specialize qw/aom_dc_predictor_8x4 neon sse2/; 222 specialize qw/aom_dc_predictor_8x8 neon sse2/; 223 specialize qw/aom_dc_predictor_8x16 neon sse2/; 224 specialize qw/aom_dc_predictor_16x8 neon sse2/; 225 specialize qw/aom_dc_predictor_16x16 neon sse2/; 226 specialize qw/aom_dc_predictor_16x32 neon sse2/; 227 specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/; 228 specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/; 229 specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/; 230 specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/; 231 specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/; 232 233 234 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 235 specialize qw/aom_dc_top_predictor_4x16 neon sse2/; 236 specialize qw/aom_dc_top_predictor_8x32 neon sse2/; 237 specialize qw/aom_dc_top_predictor_16x4 neon sse2/; 238 specialize qw/aom_dc_top_predictor_16x64 neon sse2/; 239 specialize qw/aom_dc_top_predictor_32x8 neon sse2/; 240 specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/; 241 242 specialize qw/aom_dc_left_predictor_4x16 neon sse2/; 243 specialize qw/aom_dc_left_predictor_8x32 neon sse2/; 244 specialize qw/aom_dc_left_predictor_16x4 neon sse2/; 245 specialize qw/aom_dc_left_predictor_16x64 neon sse2/; 246 specialize qw/aom_dc_left_predictor_32x8 neon sse2/; 247 specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/; 248 249 specialize qw/aom_dc_128_predictor_4x16 neon sse2/; 250 specialize qw/aom_dc_128_predictor_8x32 neon sse2/; 251 specialize qw/aom_dc_128_predictor_16x4 neon sse2/; 252 specialize qw/aom_dc_128_predictor_16x64 neon sse2/; 253 specialize qw/aom_dc_128_predictor_32x8 neon sse2/; 254 specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/; 255 256 specialize qw/aom_v_predictor_4x16 neon sse2/; 257 specialize qw/aom_v_predictor_8x32 neon sse2/; 258 specialize qw/aom_v_predictor_16x4 neon sse2/; 259 specialize qw/aom_v_predictor_16x64 neon sse2/; 260 specialize qw/aom_v_predictor_32x8 neon sse2/; 261 specialize qw/aom_v_predictor_64x16 neon sse2 avx2/; 262 263 specialize qw/aom_h_predictor_4x16 neon sse2/; 264 specialize qw/aom_h_predictor_8x32 neon sse2/; 265 specialize qw/aom_h_predictor_16x4 neon sse2/; 266 specialize qw/aom_h_predictor_16x64 neon sse2/; 267 specialize qw/aom_h_predictor_32x8 neon sse2/; 268 specialize qw/aom_h_predictor_64x16 neon sse2/; 269 270 specialize qw/aom_paeth_predictor_4x16 ssse3 neon/; 271 specialize qw/aom_paeth_predictor_8x32 ssse3 neon/; 272 specialize qw/aom_paeth_predictor_16x4 ssse3 neon/; 273 specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/; 274 specialize qw/aom_paeth_predictor_32x8 ssse3 neon/; 275 specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/; 276 277 specialize qw/aom_smooth_predictor_4x16 neon ssse3/; 278 specialize qw/aom_smooth_predictor_8x32 neon ssse3/; 279 specialize qw/aom_smooth_predictor_16x4 neon ssse3/; 280 specialize qw/aom_smooth_predictor_16x64 neon ssse3/; 281 specialize qw/aom_smooth_predictor_32x8 neon ssse3/; 282 specialize qw/aom_smooth_predictor_64x16 neon ssse3/; 283 284 specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/; 285 specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/; 286 specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/; 287 specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/; 288 specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/; 289 specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/; 290 291 specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/; 292 specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/; 293 specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/; 294 specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/; 295 specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/; 296 specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/; 297 298 specialize qw/aom_dc_predictor_4x16 neon sse2/; 299 specialize qw/aom_dc_predictor_8x32 neon sse2/; 300 specialize qw/aom_dc_predictor_16x4 neon sse2/; 301 specialize qw/aom_dc_predictor_16x64 neon sse2/; 302 specialize qw/aom_dc_predictor_32x8 neon sse2/; 303 specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/; 304 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 305 306 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 307 specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/; 308 specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/; 309 specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/; 310 specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/; 311 specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/; 312 specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/; 313 specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/; 314 specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/; 315 specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/; 316 specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/; 317 specialize qw/aom_highbd_v_predictor_32x64 neon/; 318 specialize qw/aom_highbd_v_predictor_64x32 neon/; 319 specialize qw/aom_highbd_v_predictor_64x64 neon/; 320 321 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division 322 # by multiply and shift. 323 specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/; 324 specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/; 325 specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/; 326 specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/; 327 specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/; 328 specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/; 329 specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/; 330 specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/; 331 specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/; 332 specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/; 333 specialize qw/aom_highbd_dc_predictor_32x64 neon/; 334 specialize qw/aom_highbd_dc_predictor_64x32 neon/; 335 specialize qw/aom_highbd_dc_predictor_64x64 neon/; 336 337 specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/; 338 specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/; 339 specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/; 340 specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/; 341 specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/; 342 specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/; 343 specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/; 344 specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/; 345 specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/; 346 specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/; 347 specialize qw/aom_highbd_h_predictor_32x64 neon/; 348 specialize qw/aom_highbd_h_predictor_64x32 neon/; 349 specialize qw/aom_highbd_h_predictor_64x64 neon/; 350 351 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/; 352 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/; 353 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/; 354 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/; 355 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/; 356 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/; 357 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/; 358 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/; 359 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/; 360 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/; 361 specialize qw/aom_highbd_dc_128_predictor_32x64 neon/; 362 specialize qw/aom_highbd_dc_128_predictor_64x32 neon/; 363 specialize qw/aom_highbd_dc_128_predictor_64x64 neon/; 364 365 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/; 366 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/; 367 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/; 368 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/; 369 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/; 370 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/; 371 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/; 372 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/; 373 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/; 374 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/; 375 specialize qw/aom_highbd_dc_left_predictor_32x64 neon/; 376 specialize qw/aom_highbd_dc_left_predictor_64x32 neon/; 377 specialize qw/aom_highbd_dc_left_predictor_64x64 neon/; 378 379 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/; 380 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/; 381 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/; 382 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/; 383 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/; 384 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/; 385 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/; 386 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/; 387 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/; 388 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/; 389 specialize qw/aom_highbd_dc_top_predictor_32x64 neon/; 390 specialize qw/aom_highbd_dc_top_predictor_64x32 neon/; 391 specialize qw/aom_highbd_dc_top_predictor_64x64 neon/; 392 393 specialize qw/aom_highbd_paeth_predictor_4x4 neon/; 394 specialize qw/aom_highbd_paeth_predictor_4x8 neon/; 395 specialize qw/aom_highbd_paeth_predictor_8x4 neon/; 396 specialize qw/aom_highbd_paeth_predictor_8x8 neon/; 397 specialize qw/aom_highbd_paeth_predictor_8x16 neon/; 398 specialize qw/aom_highbd_paeth_predictor_16x8 neon/; 399 specialize qw/aom_highbd_paeth_predictor_16x16 neon/; 400 specialize qw/aom_highbd_paeth_predictor_16x32 neon/; 401 specialize qw/aom_highbd_paeth_predictor_32x16 neon/; 402 specialize qw/aom_highbd_paeth_predictor_32x32 neon/; 403 specialize qw/aom_highbd_paeth_predictor_32x64 neon/; 404 specialize qw/aom_highbd_paeth_predictor_64x32 neon/; 405 specialize qw/aom_highbd_paeth_predictor_64x64 neon/; 406 407 specialize qw/aom_highbd_smooth_predictor_4x4 neon/; 408 specialize qw/aom_highbd_smooth_predictor_4x8 neon/; 409 specialize qw/aom_highbd_smooth_predictor_8x4 neon/; 410 specialize qw/aom_highbd_smooth_predictor_8x8 neon/; 411 specialize qw/aom_highbd_smooth_predictor_8x16 neon/; 412 specialize qw/aom_highbd_smooth_predictor_16x8 neon/; 413 specialize qw/aom_highbd_smooth_predictor_16x16 neon/; 414 specialize qw/aom_highbd_smooth_predictor_16x32 neon/; 415 specialize qw/aom_highbd_smooth_predictor_32x16 neon/; 416 specialize qw/aom_highbd_smooth_predictor_32x32 neon/; 417 specialize qw/aom_highbd_smooth_predictor_32x64 neon/; 418 specialize qw/aom_highbd_smooth_predictor_64x32 neon/; 419 specialize qw/aom_highbd_smooth_predictor_64x64 neon/; 420 421 specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/; 422 specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/; 423 specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/; 424 specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/; 425 specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/; 426 specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/; 427 specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/; 428 specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/; 429 specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/; 430 specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/; 431 specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/; 432 specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/; 433 specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/; 434 specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/; 435 specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/; 436 437 specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/; 438 specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/; 439 specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/; 440 specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/; 441 specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/; 442 specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/; 443 specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/; 444 specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/; 445 specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/; 446 specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/; 447 specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/; 448 449 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 450 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 451 specialize qw/aom_highbd_v_predictor_4x16 neon/; 452 specialize qw/aom_highbd_v_predictor_8x32 neon/; 453 specialize qw/aom_highbd_v_predictor_16x4 neon/; 454 specialize qw/aom_highbd_v_predictor_16x64 neon/; 455 specialize qw/aom_highbd_v_predictor_32x8 neon/; 456 specialize qw/aom_highbd_v_predictor_64x16 neon/; 457 458 specialize qw/aom_highbd_dc_predictor_4x16 neon/; 459 specialize qw/aom_highbd_dc_predictor_8x32 neon/; 460 specialize qw/aom_highbd_dc_predictor_16x4 neon/; 461 specialize qw/aom_highbd_dc_predictor_16x64 neon/; 462 specialize qw/aom_highbd_dc_predictor_32x8 neon/; 463 specialize qw/aom_highbd_dc_predictor_64x16 neon/; 464 465 specialize qw/aom_highbd_h_predictor_4x16 neon/; 466 specialize qw/aom_highbd_h_predictor_8x32 neon/; 467 specialize qw/aom_highbd_h_predictor_16x4 neon/; 468 specialize qw/aom_highbd_h_predictor_16x64 neon/; 469 specialize qw/aom_highbd_h_predictor_32x8 neon/; 470 specialize qw/aom_highbd_h_predictor_64x16 neon/; 471 472 specialize qw/aom_highbd_dc_128_predictor_4x16 neon/; 473 specialize qw/aom_highbd_dc_128_predictor_8x32 neon/; 474 specialize qw/aom_highbd_dc_128_predictor_16x4 neon/; 475 specialize qw/aom_highbd_dc_128_predictor_16x64 neon/; 476 specialize qw/aom_highbd_dc_128_predictor_32x8 neon/; 477 specialize qw/aom_highbd_dc_128_predictor_64x16 neon/; 478 479 specialize qw/aom_highbd_dc_left_predictor_4x16 neon/; 480 specialize qw/aom_highbd_dc_left_predictor_8x32 neon/; 481 specialize qw/aom_highbd_dc_left_predictor_16x4 neon/; 482 specialize qw/aom_highbd_dc_left_predictor_16x64 neon/; 483 specialize qw/aom_highbd_dc_left_predictor_32x8 neon/; 484 specialize qw/aom_highbd_dc_left_predictor_64x16 neon/; 485 486 specialize qw/aom_highbd_dc_top_predictor_4x16 neon/; 487 specialize qw/aom_highbd_dc_top_predictor_8x32 neon/; 488 specialize qw/aom_highbd_dc_top_predictor_16x4 neon/; 489 specialize qw/aom_highbd_dc_top_predictor_16x64 neon/; 490 specialize qw/aom_highbd_dc_top_predictor_32x8 neon/; 491 specialize qw/aom_highbd_dc_top_predictor_64x16 neon/; 492 493 specialize qw/aom_highbd_paeth_predictor_4x16 neon/; 494 specialize qw/aom_highbd_paeth_predictor_8x32 neon/; 495 specialize qw/aom_highbd_paeth_predictor_16x4 neon/; 496 specialize qw/aom_highbd_paeth_predictor_16x64 neon/; 497 specialize qw/aom_highbd_paeth_predictor_32x8 neon/; 498 specialize qw/aom_highbd_paeth_predictor_64x16 neon/; 499 500 specialize qw/aom_highbd_smooth_predictor_4x16 neon/; 501 specialize qw/aom_highbd_smooth_predictor_8x32 neon/; 502 specialize qw/aom_highbd_smooth_predictor_16x4 neon/; 503 specialize qw/aom_highbd_smooth_predictor_16x64 neon/; 504 specialize qw/aom_highbd_smooth_predictor_32x8 neon/; 505 specialize qw/aom_highbd_smooth_predictor_64x16 neon/; 506 507 specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/; 508 specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/; 509 specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/; 510 specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/; 511 specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/; 512 specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/; 513 514 specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/; 515 specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/; 516 specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/; 517 specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/; 518 specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/; 519 specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/; 520 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 521 } 522 # 523 # Sub Pixel Filters 524 # 525 add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h"; 526 add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 527 add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 528 529 specialize qw/aom_convolve_copy neon sse2 avx2/; 530 specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 531 specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 532 533 add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; 534 specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/; 535 536 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 537 add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h"; 538 specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/; 539 540 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 541 specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/; 542 543 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 544 specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/; 545 } 546 547 # 548 # Loopfilter 549 # 550 add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 551 specialize qw/aom_lpf_vertical_14 sse2 neon/; 552 553 add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 554 specialize qw/aom_lpf_vertical_14_dual sse2 neon/; 555 556 add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 557 specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/; 558 559 add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 560 specialize qw/aom_lpf_vertical_6 sse2 neon/; 561 562 add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 563 specialize qw/aom_lpf_vertical_8 sse2 neon/; 564 565 add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 566 specialize qw/aom_lpf_vertical_8_dual sse2 neon/; 567 568 add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 569 specialize qw/aom_lpf_vertical_8_quad sse2 neon/; 570 571 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 572 specialize qw/aom_lpf_vertical_4 sse2 neon/; 573 574 add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 575 specialize qw/aom_lpf_vertical_4_dual sse2 neon/; 576 577 add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 578 specialize qw/aom_lpf_vertical_4_quad sse2 neon/; 579 580 add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 581 specialize qw/aom_lpf_horizontal_14 sse2 neon/; 582 583 add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 584 specialize qw/aom_lpf_horizontal_14_dual sse2 neon/; 585 586 add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 587 specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/; 588 589 add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 590 specialize qw/aom_lpf_horizontal_6 sse2 neon/; 591 592 add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 593 specialize qw/aom_lpf_horizontal_6_dual sse2 neon/; 594 595 add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 596 specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/; 597 598 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 599 specialize qw/aom_lpf_horizontal_8 sse2 neon/; 600 601 add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 602 specialize qw/aom_lpf_horizontal_8_dual sse2 neon/; 603 604 add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 605 specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/; 606 607 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 608 specialize qw/aom_lpf_horizontal_4 sse2 neon/; 609 610 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 611 specialize qw/aom_lpf_horizontal_4_dual sse2 neon/; 612 613 add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 614 specialize qw/aom_lpf_horizontal_4_quad sse2 neon/; 615 616 add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 617 specialize qw/aom_lpf_vertical_6_dual sse2 neon/; 618 619 add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 620 specialize qw/aom_lpf_vertical_6_quad sse2 neon/; 621 622 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 623 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 624 specialize qw/aom_highbd_lpf_vertical_14 neon sse2/; 625 626 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 627 specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/; 628 629 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 630 specialize qw/aom_highbd_lpf_vertical_8 neon sse2/; 631 632 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 633 specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/; 634 635 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 636 specialize qw/aom_highbd_lpf_vertical_6 neon sse2/; 637 638 add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 639 specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/; 640 641 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 642 specialize qw/aom_highbd_lpf_vertical_4 neon sse2/; 643 644 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 645 specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/; 646 647 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 648 specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/; 649 650 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd"; 651 specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/; 652 653 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 654 specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/; 655 656 add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 657 specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/; 658 659 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 660 specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/; 661 662 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 663 specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/; 664 665 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 666 specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/; 667 668 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 669 specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/; 670 } 671 672 # 673 # Encoder functions. 674 # 675 676 # 677 # Forward transform 678 # 679 if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){ 680 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; 681 specialize qw/aom_fdct4x4 neon sse2/; 682 683 add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride"; 684 specialize qw/aom_fdct4x4_lp neon sse2/; 685 686 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){ 687 # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible 688 # with av1 scan orders, because it does two transposes. 689 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 690 specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64"; 691 # High bit depth 692 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 693 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 694 specialize qw/aom_highbd_fdct8x8 sse2/; 695 } 696 } 697 # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation) 698 add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output"; 699 700 add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output"; 701 specialize qw/aom_fft4x4_float sse2/; 702 703 add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output"; 704 specialize qw/aom_fft8x8_float avx2 sse2/; 705 706 add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output"; 707 specialize qw/aom_fft16x16_float avx2 sse2/; 708 709 add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output"; 710 specialize qw/aom_fft32x32_float avx2 sse2/; 711 712 add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output"; 713 714 add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output"; 715 specialize qw/aom_ifft4x4_float sse2/; 716 717 add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output"; 718 specialize qw/aom_ifft8x8_float avx2 sse2/; 719 720 add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output"; 721 specialize qw/aom_ifft16x16_float avx2 sse2/; 722 723 add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output"; 724 specialize qw/aom_ifft32x32_float avx2 sse2/; 725 } # CONFIG_AV1_ENCODER 726 727 # 728 # Quantization 729 # 730 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 731 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 732 specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64"; 733 734 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 735 specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64"; 736 737 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 738 specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/; 739 740 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 741 add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 742 specialize qw/aom_quantize_b_adaptive sse2 avx2/; 743 744 add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 745 specialize qw/aom_quantize_b_32x32_adaptive sse2/; 746 747 add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 748 specialize qw/aom_quantize_b_64x64_adaptive sse2/; 749 } 750 } # CONFIG_AV1_ENCODER 751 752 if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 753 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 754 specialize qw/aom_highbd_quantize_b sse2 avx2 neon/; 755 756 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 757 specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/; 758 759 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 760 specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/; 761 762 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 763 add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 764 specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/; 765 766 add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 767 specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/; 768 769 add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 770 specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/; 771 } 772 } # CONFIG_AV1_ENCODER 773 774 # 775 # Alpha blending with mask 776 # 777 add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params"; 778 specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/; 779 add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh"; 780 add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 781 add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 782 specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/; 783 specialize "aom_blend_a64_hmask", qw/sse4_1 neon/; 784 specialize "aom_blend_a64_vmask", qw/sse4_1 neon/; 785 786 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 787 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd"; 788 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 789 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 790 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd"; 791 specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/; 792 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/; 793 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/; 794 specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/; 795 } 796 797 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 798 # 799 # Block subtraction 800 # 801 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 802 specialize qw/aom_subtract_block neon sse2 avx2/; 803 804 add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height"; 805 specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/; 806 807 add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum"; 808 specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/; 809 810 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 811 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 812 specialize qw/aom_highbd_subtract_block sse2 neon/; 813 814 add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height"; 815 specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/; 816 } 817 818 # 819 # Sum of Squares 820 # 821 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height"; 822 specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/; 823 824 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N"; 825 specialize qw/aom_sum_squares_i16 sse2 neon sve/; 826 827 add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height"; 828 specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/; 829 830 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 831 add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height"; 832 specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/; 833 } 834 835 # 836 # Single block SAD / Single block Avg SAD 837 # 838 foreach (@encoder_block_sizes) { 839 ($w, $h) = @$_; 840 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 841 if ($h >= 16) { 842 add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 843 } 844 if ($w != 4 && $h != 4) { 845 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 846 } 847 } 848 849 add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum"; 850 specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/; 851 specialize qw/aom_sad128x128 avx2 sse2 neon neon_dotprod/; 852 specialize qw/aom_sad128x64 avx2 sse2 neon neon_dotprod/; 853 specialize qw/aom_sad64x128 avx2 sse2 neon neon_dotprod/; 854 specialize qw/aom_sad64x64 avx2 sse2 neon neon_dotprod/; 855 specialize qw/aom_sad64x32 avx2 sse2 neon neon_dotprod/; 856 857 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 858 specialize qw/aom_sad128x128 avx512/; 859 specialize qw/aom_sad128x64 avx512/; 860 specialize qw/aom_sad64x128 avx512/; 861 specialize qw/aom_sad64x64 avx512/; 862 specialize qw/aom_sad64x32 avx512/; 863 } 864 865 specialize qw/aom_sad32x64 avx2 sse2 neon neon_dotprod/; 866 specialize qw/aom_sad32x32 avx2 sse2 neon neon_dotprod/; 867 specialize qw/aom_sad32x16 avx2 sse2 neon neon_dotprod/; 868 specialize qw/aom_sad16x32 sse2 neon neon_dotprod/; 869 specialize qw/aom_sad16x16 sse2 neon neon_dotprod/; 870 specialize qw/aom_sad16x8 sse2 neon neon_dotprod/; 871 specialize qw/aom_sad8x16 sse2 neon/; 872 specialize qw/aom_sad8x8 sse2 neon/; 873 specialize qw/aom_sad8x4 sse2 neon/; 874 specialize qw/aom_sad4x8 sse2 neon/; 875 specialize qw/aom_sad4x4 sse2 neon/; 876 877 specialize qw/aom_sad4x16 sse2 neon/; 878 specialize qw/aom_sad16x4 sse2 neon neon_dotprod/; 879 specialize qw/aom_sad8x32 sse2 neon/; 880 specialize qw/aom_sad32x8 sse2 neon neon_dotprod/; 881 specialize qw/aom_sad16x64 sse2 neon neon_dotprod/; 882 specialize qw/aom_sad64x16 sse2 neon neon_dotprod/; 883 884 specialize qw/aom_sad_skip_128x128 avx2 sse2 neon neon_dotprod/; 885 specialize qw/aom_sad_skip_128x64 avx2 sse2 neon neon_dotprod/; 886 specialize qw/aom_sad_skip_64x128 avx2 sse2 neon neon_dotprod/; 887 specialize qw/aom_sad_skip_64x64 avx2 sse2 neon neon_dotprod/; 888 specialize qw/aom_sad_skip_64x32 avx2 sse2 neon neon_dotprod/; 889 890 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 891 specialize qw/aom_sad_skip_128x128 avx512/; 892 specialize qw/aom_sad_skip_128x64 avx512/; 893 specialize qw/aom_sad_skip_64x128 avx512/; 894 specialize qw/aom_sad_skip_64x64 avx512/; 895 specialize qw/aom_sad_skip_64x32 avx512/; 896 } 897 898 specialize qw/aom_sad_skip_32x64 avx2 sse2 neon neon_dotprod/; 899 specialize qw/aom_sad_skip_32x32 avx2 sse2 neon neon_dotprod/; 900 specialize qw/aom_sad_skip_32x16 avx2 sse2 neon neon_dotprod/; 901 specialize qw/aom_sad_skip_16x32 sse2 neon neon_dotprod/; 902 specialize qw/aom_sad_skip_16x16 sse2 neon neon_dotprod/; 903 specialize qw/aom_sad_skip_16x8 sse2 neon neon_dotprod/; 904 specialize qw/aom_sad_skip_8x16 sse2 neon/; 905 906 specialize qw/aom_sad_skip_4x16 sse2 neon/; 907 specialize qw/aom_sad_skip_8x32 sse2 neon/; 908 specialize qw/aom_sad_skip_16x64 sse2 neon neon_dotprod/; 909 specialize qw/aom_sad_skip_64x16 sse2 neon neon_dotprod/; 910 911 specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/; 912 specialize qw/aom_sad128x64_avg avx2 sse2 neon neon_dotprod/; 913 specialize qw/aom_sad64x128_avg avx2 sse2 neon neon_dotprod/; 914 specialize qw/aom_sad64x64_avg avx2 sse2 neon neon_dotprod/; 915 specialize qw/aom_sad64x32_avg avx2 sse2 neon neon_dotprod/; 916 specialize qw/aom_sad32x64_avg avx2 sse2 neon neon_dotprod/; 917 918 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 919 specialize qw/aom_sad128x128_avg avx512/; 920 specialize qw/aom_sad128x64_avg avx512/; 921 specialize qw/aom_sad64x128_avg avx512/; 922 specialize qw/aom_sad64x64_avg avx512/; 923 specialize qw/aom_sad64x32_avg avx512/; 924 } 925 926 specialize qw/aom_sad32x32_avg avx2 sse2 neon neon_dotprod/; 927 specialize qw/aom_sad32x16_avg avx2 sse2 neon neon_dotprod/; 928 specialize qw/aom_sad16x32_avg sse2 neon neon_dotprod/; 929 specialize qw/aom_sad16x16_avg sse2 neon neon_dotprod/; 930 specialize qw/aom_sad16x8_avg sse2 neon neon_dotprod/; 931 specialize qw/aom_sad8x16_avg sse2 neon/; 932 specialize qw/aom_sad8x8_avg sse2 neon/; 933 934 specialize qw/aom_sad8x32_avg sse2 neon/; 935 specialize qw/aom_sad32x8_avg sse2 neon neon_dotprod/; 936 specialize qw/aom_sad16x64_avg sse2 neon neon_dotprod/; 937 specialize qw/aom_sad64x16_avg sse2 neon neon_dotprod/; 938 939 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 940 foreach (@encoder_block_sizes) { 941 ($w, $h) = @$_; 942 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 943 if ($h >= 16) { 944 add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 945 } 946 if ($w != 4 && $h != 4) { 947 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 948 } 949 if ($w != 128 && $h != 128 && $w != 4) { 950 specialize "aom_highbd_sad${w}x${h}", qw/sse2/; 951 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; 952 } 953 } 954 specialize qw/aom_highbd_sad128x128 avx2 neon/; 955 specialize qw/aom_highbd_sad128x64 avx2 neon/; 956 specialize qw/aom_highbd_sad64x128 avx2 neon/; 957 specialize qw/aom_highbd_sad64x64 avx2 sse2 neon/; 958 specialize qw/aom_highbd_sad64x32 avx2 sse2 neon/; 959 specialize qw/aom_highbd_sad32x64 avx2 sse2 neon/; 960 specialize qw/aom_highbd_sad32x32 avx2 sse2 neon/; 961 specialize qw/aom_highbd_sad32x16 avx2 sse2 neon/; 962 specialize qw/aom_highbd_sad16x32 avx2 sse2 neon/; 963 specialize qw/aom_highbd_sad16x16 avx2 sse2 neon/; 964 specialize qw/aom_highbd_sad16x8 avx2 sse2 neon/; 965 specialize qw/aom_highbd_sad8x16 sse2 neon/; 966 specialize qw/aom_highbd_sad8x8 sse2 neon/; 967 specialize qw/aom_highbd_sad8x4 sse2 neon/; 968 specialize qw/aom_highbd_sad4x8 sse2 neon/; 969 specialize qw/aom_highbd_sad4x4 sse2 neon/; 970 971 specialize qw/aom_highbd_sad4x16 sse2 neon/; 972 specialize qw/aom_highbd_sad16x4 avx2 sse2 neon/; 973 specialize qw/aom_highbd_sad8x32 sse2 neon/; 974 specialize qw/aom_highbd_sad32x8 avx2 sse2 neon/; 975 specialize qw/aom_highbd_sad16x64 avx2 sse2 neon/; 976 specialize qw/aom_highbd_sad64x16 avx2 sse2 neon/; 977 978 specialize qw/aom_highbd_sad_skip_128x128 avx2 neon/; 979 specialize qw/aom_highbd_sad_skip_128x64 avx2 neon/; 980 specialize qw/aom_highbd_sad_skip_64x128 avx2 neon/; 981 specialize qw/aom_highbd_sad_skip_64x64 avx2 sse2 neon/; 982 specialize qw/aom_highbd_sad_skip_64x32 avx2 sse2 neon/; 983 specialize qw/aom_highbd_sad_skip_32x64 avx2 sse2 neon/; 984 specialize qw/aom_highbd_sad_skip_32x32 avx2 sse2 neon/; 985 specialize qw/aom_highbd_sad_skip_32x16 avx2 sse2 neon/; 986 specialize qw/aom_highbd_sad_skip_16x32 avx2 sse2 neon/; 987 specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2 neon/; 988 specialize qw/aom_highbd_sad_skip_8x16 sse2 neon/; 989 990 specialize qw/aom_highbd_sad_skip_4x16 sse2 neon/; 991 specialize qw/aom_highbd_sad_skip_8x32 sse2 neon/; 992 specialize qw/aom_highbd_sad_skip_16x64 avx2 sse2 neon/; 993 specialize qw/aom_highbd_sad_skip_64x16 avx2 sse2 neon/; 994 995 specialize qw/aom_highbd_sad128x128_avg avx2 neon/; 996 specialize qw/aom_highbd_sad128x64_avg avx2 neon/; 997 specialize qw/aom_highbd_sad64x128_avg avx2 neon/; 998 specialize qw/aom_highbd_sad64x64_avg avx2 sse2 neon/; 999 specialize qw/aom_highbd_sad64x32_avg avx2 sse2 neon/; 1000 specialize qw/aom_highbd_sad32x64_avg avx2 sse2 neon/; 1001 specialize qw/aom_highbd_sad32x32_avg avx2 sse2 neon/; 1002 specialize qw/aom_highbd_sad32x16_avg avx2 sse2 neon/; 1003 specialize qw/aom_highbd_sad16x32_avg avx2 sse2 neon/; 1004 specialize qw/aom_highbd_sad16x16_avg avx2 sse2 neon/; 1005 specialize qw/aom_highbd_sad16x8_avg avx2 sse2 neon/; 1006 specialize qw/aom_highbd_sad8x16_avg neon/; 1007 specialize qw/aom_highbd_sad8x8_avg neon/; 1008 1009 specialize qw/aom_highbd_sad8x32_avg sse2 neon/; 1010 specialize qw/aom_highbd_sad16x64_avg avx2 sse2 neon/; 1011 specialize qw/aom_highbd_sad32x8_avg avx2 sse2 neon/; 1012 specialize qw/aom_highbd_sad64x16_avg avx2 sse2 neon/; 1013 } 1014 # 1015 # Masked SAD 1016 # 1017 foreach (@encoder_block_sizes) { 1018 ($w, $h) = @$_; 1019 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask"; 1020 specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1021 } 1022 1023 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1024 foreach (@encoder_block_sizes) { 1025 ($w, $h) = @$_; 1026 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask"; 1027 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1028 } 1029 } 1030 1031 # 1032 # OBMC SAD 1033 # 1034 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1035 foreach (@encoder_block_sizes) { 1036 ($w, $h) = @$_; 1037 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1038 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1039 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1040 } 1041 } 1042 1043 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1044 foreach (@encoder_block_sizes) { 1045 ($w, $h) = @$_; 1046 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1047 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1048 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1049 } 1050 } 1051 } 1052 } 1053 1054 # 1055 # Multi-block SAD, comparing a reference to N independent blocks 1056 # 1057 foreach (@encoder_block_sizes) { 1058 ($w, $h) = @$_; 1059 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1060 add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1061 if ($h >= 16) { 1062 add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1063 } 1064 } 1065 1066 specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/; 1067 specialize qw/aom_sad128x64x4d avx2 sse2 neon neon_dotprod/; 1068 specialize qw/aom_sad64x128x4d avx2 sse2 neon neon_dotprod/; 1069 specialize qw/aom_sad64x64x4d avx2 sse2 neon neon_dotprod/; 1070 specialize qw/aom_sad64x32x4d avx2 sse2 neon neon_dotprod/; 1071 specialize qw/aom_sad32x64x4d avx2 sse2 neon neon_dotprod/; 1072 1073 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 1074 specialize qw/aom_sad128x128x4d avx512/; 1075 specialize qw/aom_sad128x64x4d avx512/; 1076 specialize qw/aom_sad64x128x4d avx512/; 1077 specialize qw/aom_sad64x64x4d avx512/; 1078 specialize qw/aom_sad64x32x4d avx512/; 1079 } 1080 1081 specialize qw/aom_sad32x32x4d avx2 sse2 neon neon_dotprod/; 1082 specialize qw/aom_sad32x16x4d avx2 sse2 neon neon_dotprod/; 1083 specialize qw/aom_sad16x32x4d avx2 sse2 neon neon_dotprod/; 1084 specialize qw/aom_sad16x16x4d avx2 sse2 neon neon_dotprod/; 1085 specialize qw/aom_sad16x8x4d avx2 sse2 neon neon_dotprod/; 1086 1087 specialize qw/aom_sad8x16x4d sse2 neon/; 1088 specialize qw/aom_sad8x8x4d sse2 neon/; 1089 specialize qw/aom_sad8x4x4d sse2 neon/; 1090 specialize qw/aom_sad4x8x4d sse2 neon/; 1091 specialize qw/aom_sad4x4x4d sse2 neon/; 1092 1093 specialize qw/aom_sad64x16x4d avx2 sse2 neon neon_dotprod/; 1094 specialize qw/aom_sad32x8x4d avx2 sse2 neon neon_dotprod/; 1095 specialize qw/aom_sad16x64x4d avx2 sse2 neon neon_dotprod/; 1096 specialize qw/aom_sad16x4x4d avx2 sse2 neon neon_dotprod/; 1097 specialize qw/aom_sad8x32x4d sse2 neon/; 1098 specialize qw/aom_sad4x16x4d sse2 neon/; 1099 1100 specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/; 1101 specialize qw/aom_sad_skip_128x64x4d avx2 sse2 neon neon_dotprod/; 1102 specialize qw/aom_sad_skip_64x128x4d avx2 sse2 neon neon_dotprod/; 1103 specialize qw/aom_sad_skip_64x64x4d avx2 sse2 neon neon_dotprod/; 1104 specialize qw/aom_sad_skip_64x32x4d avx2 sse2 neon neon_dotprod/; 1105 specialize qw/aom_sad_skip_64x16x4d avx2 sse2 neon neon_dotprod/; 1106 specialize qw/aom_sad_skip_32x64x4d avx2 sse2 neon neon_dotprod/; 1107 specialize qw/aom_sad_skip_32x32x4d avx2 sse2 neon neon_dotprod/; 1108 specialize qw/aom_sad_skip_32x16x4d avx2 sse2 neon neon_dotprod/; 1109 1110 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 1111 specialize qw/aom_sad_skip_128x128x4d avx512/; 1112 specialize qw/aom_sad_skip_128x64x4d avx512/; 1113 specialize qw/aom_sad_skip_64x128x4d avx512/; 1114 specialize qw/aom_sad_skip_64x64x4d avx512/; 1115 specialize qw/aom_sad_skip_64x32x4d avx512/; 1116 } 1117 1118 specialize qw/aom_sad_skip_16x64x4d avx2 sse2 neon neon_dotprod/; 1119 specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/; 1120 specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/; 1121 specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/; 1122 specialize qw/aom_sad_skip_8x32x4d sse2 neon/; 1123 specialize qw/aom_sad_skip_8x16x4d sse2 neon/; 1124 specialize qw/aom_sad_skip_4x16x4d sse2 neon/; 1125 1126 specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/; 1127 specialize qw/aom_sad128x64x3d avx2 neon neon_dotprod/; 1128 specialize qw/aom_sad64x128x3d avx2 neon neon_dotprod/; 1129 specialize qw/aom_sad64x64x3d avx2 neon neon_dotprod/; 1130 specialize qw/aom_sad64x32x3d avx2 neon neon_dotprod/; 1131 1132 if(aom_config("CONFIG_HIGHWAY") eq "yes") { 1133 specialize qw/aom_sad128x128x3d avx512/; 1134 specialize qw/aom_sad128x64x3d avx512/; 1135 specialize qw/aom_sad64x128x3d avx512/; 1136 specialize qw/aom_sad64x64x3d avx512/; 1137 specialize qw/aom_sad64x32x3d avx512/; 1138 } 1139 1140 specialize qw/aom_sad32x64x3d avx2 neon neon_dotprod/; 1141 specialize qw/aom_sad32x32x3d avx2 neon neon_dotprod/; 1142 specialize qw/aom_sad32x16x3d avx2 neon neon_dotprod/; 1143 specialize qw/aom_sad16x32x3d avx2 neon neon_dotprod/; 1144 specialize qw/aom_sad16x16x3d avx2 neon neon_dotprod/; 1145 specialize qw/aom_sad16x8x3d avx2 neon neon_dotprod/; 1146 specialize qw/aom_sad8x16x3d neon/; 1147 specialize qw/aom_sad8x8x3d neon/; 1148 specialize qw/aom_sad8x4x3d neon/; 1149 specialize qw/aom_sad4x8x3d neon/; 1150 specialize qw/aom_sad4x4x3d neon/; 1151 1152 specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/; 1153 specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/; 1154 specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/; 1155 specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/; 1156 specialize qw/aom_sad8x32x3d neon/; 1157 specialize qw/aom_sad4x16x3d neon/; 1158 1159 # 1160 # Multi-block SAD, comparing a reference to N independent blocks 1161 # 1162 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1163 foreach (@encoder_block_sizes) { 1164 ($w, $h) = @$_; 1165 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1166 add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1167 if ($h >= 16) { 1168 add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1169 } 1170 if ($w != 128 && $h != 128) { 1171 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/; 1172 } 1173 } 1174 specialize qw/aom_highbd_sad128x128x4d avx2 neon/; 1175 specialize qw/aom_highbd_sad128x64x4d avx2 neon/; 1176 specialize qw/aom_highbd_sad64x128x4d avx2 neon/; 1177 specialize qw/aom_highbd_sad64x64x4d sse2 avx2 neon/; 1178 specialize qw/aom_highbd_sad64x32x4d sse2 avx2 neon/; 1179 specialize qw/aom_highbd_sad32x64x4d sse2 avx2 neon/; 1180 specialize qw/aom_highbd_sad32x32x4d sse2 avx2 neon/; 1181 specialize qw/aom_highbd_sad32x16x4d sse2 avx2 neon/; 1182 specialize qw/aom_highbd_sad16x32x4d sse2 avx2 neon/; 1183 specialize qw/aom_highbd_sad16x16x4d sse2 avx2 neon/; 1184 specialize qw/aom_highbd_sad16x8x4d sse2 avx2 neon/; 1185 specialize qw/aom_highbd_sad8x16x4d sse2 neon/; 1186 specialize qw/aom_highbd_sad8x8x4d sse2 neon/; 1187 specialize qw/aom_highbd_sad8x4x4d sse2 neon/; 1188 specialize qw/aom_highbd_sad4x8x4d sse2 neon/; 1189 specialize qw/aom_highbd_sad4x4x4d sse2 neon/; 1190 1191 specialize qw/aom_highbd_sad4x16x4d sse2 neon/; 1192 specialize qw/aom_highbd_sad16x4x4d avx2 sse2 neon/; 1193 specialize qw/aom_highbd_sad8x32x4d sse2 neon/; 1194 specialize qw/aom_highbd_sad32x8x4d avx2 sse2 neon/; 1195 specialize qw/aom_highbd_sad16x64x4d avx2 sse2 neon/; 1196 specialize qw/aom_highbd_sad64x16x4d avx2 sse2 neon/; 1197 1198 specialize qw/aom_highbd_sad_skip_128x128x4d avx2 neon/; 1199 specialize qw/aom_highbd_sad_skip_128x64x4d avx2 neon/; 1200 specialize qw/aom_highbd_sad_skip_64x128x4d avx2 neon/; 1201 specialize qw/aom_highbd_sad_skip_64x64x4d avx2 sse2 neon/; 1202 specialize qw/aom_highbd_sad_skip_64x32x4d avx2 sse2 neon/; 1203 specialize qw/aom_highbd_sad_skip_32x64x4d avx2 sse2 neon/; 1204 specialize qw/aom_highbd_sad_skip_32x32x4d avx2 sse2 neon/; 1205 specialize qw/aom_highbd_sad_skip_32x16x4d avx2 sse2 neon/; 1206 specialize qw/aom_highbd_sad_skip_16x32x4d avx2 sse2 neon/; 1207 specialize qw/aom_highbd_sad_skip_16x16x4d avx2 sse2 neon/; 1208 specialize qw/aom_highbd_sad_skip_8x16x4d sse2 neon/; 1209 1210 specialize qw/aom_highbd_sad_skip_4x16x4d sse2 neon/; 1211 specialize qw/aom_highbd_sad_skip_8x32x4d sse2 neon/; 1212 specialize qw/aom_highbd_sad_skip_16x64x4d avx2 sse2 neon/; 1213 specialize qw/aom_highbd_sad_skip_64x16x4d avx2 sse2 neon/; 1214 1215 specialize qw/aom_highbd_sad128x128x3d avx2 neon/; 1216 specialize qw/aom_highbd_sad128x64x3d avx2 neon/; 1217 specialize qw/aom_highbd_sad64x128x3d avx2 neon/; 1218 specialize qw/aom_highbd_sad64x64x3d avx2 neon/; 1219 specialize qw/aom_highbd_sad64x32x3d avx2 neon/; 1220 specialize qw/aom_highbd_sad32x64x3d avx2 neon/; 1221 specialize qw/aom_highbd_sad32x32x3d avx2 neon/; 1222 specialize qw/aom_highbd_sad32x16x3d avx2 neon/; 1223 specialize qw/aom_highbd_sad16x32x3d avx2 neon/; 1224 specialize qw/aom_highbd_sad16x16x3d avx2 neon/; 1225 specialize qw/aom_highbd_sad16x8x3d avx2 neon/; 1226 specialize qw/aom_highbd_sad8x16x3d neon/; 1227 specialize qw/aom_highbd_sad8x8x3d neon/; 1228 specialize qw/aom_highbd_sad8x4x3d neon/; 1229 specialize qw/aom_highbd_sad4x8x3d neon/; 1230 specialize qw/aom_highbd_sad4x4x3d neon/; 1231 1232 specialize qw/aom_highbd_sad64x16x3d avx2 neon/; 1233 specialize qw/aom_highbd_sad32x8x3d avx2 neon/; 1234 specialize qw/aom_highbd_sad16x64x3d avx2 neon/; 1235 specialize qw/aom_highbd_sad16x4x3d avx2 neon/; 1236 specialize qw/aom_highbd_sad8x32x3d neon/; 1237 specialize qw/aom_highbd_sad4x16x3d neon/; 1238 } 1239 # 1240 # Avg 1241 # 1242 add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p"; 1243 specialize qw/aom_avg_8x8 sse2 neon/; 1244 1245 add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p"; 1246 specialize qw/aom_avg_4x4 sse2 neon/; 1247 1248 add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg"; 1249 specialize qw/aom_avg_8x8_quad avx2 sse2 neon/; 1250 1251 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1252 specialize qw/aom_minmax_8x8 sse2 neon/; 1253 1254 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1255 add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p"; 1256 specialize qw/aom_highbd_avg_8x8 neon/; 1257 add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p"; 1258 specialize qw/aom_highbd_avg_4x4 neon/; 1259 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1260 specialize qw/aom_highbd_minmax_8x8 neon/; 1261 } 1262 1263 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1264 specialize qw/aom_int_pro_row avx2 sse2 neon/; 1265 1266 add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1267 specialize qw/aom_int_pro_col avx2 sse2 neon/; 1268 1269 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl"; 1270 specialize qw/aom_vector_var avx2 sse4_1 neon sve/; 1271 1272 # 1273 # hamadard transform and satd for implmenting temporal dependency model 1274 # 1275 add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1276 specialize qw/aom_hadamard_4x4 sse2 neon/; 1277 1278 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1279 specialize qw/aom_hadamard_8x8 sse2 neon/; 1280 1281 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1282 specialize qw/aom_hadamard_16x16 avx2 sse2 neon/; 1283 1284 add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1285 specialize qw/aom_hadamard_32x32 avx2 sse2 neon/; 1286 1287 add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1288 specialize qw/aom_hadamard_lp_8x8 sse2 neon/; 1289 1290 add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1291 specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/; 1292 1293 add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1294 specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/; 1295 1296 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1297 add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1298 specialize qw/aom_highbd_hadamard_8x8 avx2 neon/; 1299 1300 add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1301 specialize qw/aom_highbd_hadamard_16x16 avx2 neon/; 1302 1303 add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1304 specialize qw/aom_highbd_hadamard_32x32 avx2 neon/; 1305 } 1306 add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length"; 1307 specialize qw/aom_satd neon sse2 avx2/; 1308 1309 add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length"; 1310 specialize qw/aom_satd_lp sse2 avx2 neon/; 1311 1312 1313 # 1314 # Structured Similarity (SSIM) 1315 # 1316 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1317 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64"; 1318 1319 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1320 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1321 } 1322 } # CONFIG_AV1_ENCODER 1323 1324 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 1325 1326 # 1327 # Specialty Variance 1328 # 1329 add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8"; 1330 specialize qw/aom_get_var_sse_sum_8x8_quad avx2 sse2 neon neon_dotprod/; 1331 1332 add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16"; 1333 specialize qw/aom_get_var_sse_sum_16x16_dual avx2 sse2 neon neon_dotprod/; 1334 1335 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1336 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1337 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1338 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1339 1340 specialize qw/aom_mse16x16 sse2 avx2 neon neon_dotprod/; 1341 specialize qw/aom_mse16x8 sse2 neon neon_dotprod/; 1342 specialize qw/aom_mse8x16 sse2 neon neon_dotprod/; 1343 specialize qw/aom_mse8x8 sse2 neon neon_dotprod/; 1344 1345 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1346 foreach $bd (8, 10, 12) { 1347 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1348 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1349 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1350 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1351 1352 if ($bd eq 8) { 1353 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/; 1354 specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/; 1355 specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/; 1356 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/; 1357 } elsif ($bd eq 10) { 1358 specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/; 1359 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1360 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1361 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1362 } else { 1363 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/; 1364 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1365 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1366 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1367 } 1368 1369 } 1370 } 1371 1372 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1373 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *"; 1374 specialize qw/aom_get_mb_ss sse2 neon/; 1375 } 1376 1377 # 1378 # Variance / Subpixel Variance / Subpixel Avg Variance 1379 # 1380 add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1381 specialize qw/aom_mse_wxh_16bit sse2 avx2 neon/; 1382 1383 add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h"; 1384 specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/; 1385 1386 foreach (@encoder_block_sizes) { 1387 ($w, $h) = @$_; 1388 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 1389 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1390 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1391 } 1392 specialize qw/aom_variance128x128 sse2 avx2 neon neon_dotprod/; 1393 specialize qw/aom_variance128x64 sse2 avx2 neon neon_dotprod/; 1394 specialize qw/aom_variance64x128 sse2 avx2 neon neon_dotprod/; 1395 specialize qw/aom_variance64x64 sse2 avx2 neon neon_dotprod/; 1396 specialize qw/aom_variance64x32 sse2 avx2 neon neon_dotprod/; 1397 specialize qw/aom_variance32x64 sse2 avx2 neon neon_dotprod/; 1398 specialize qw/aom_variance32x32 sse2 avx2 neon neon_dotprod/; 1399 specialize qw/aom_variance32x16 sse2 avx2 neon neon_dotprod/; 1400 specialize qw/aom_variance16x32 sse2 avx2 neon neon_dotprod/; 1401 specialize qw/aom_variance16x16 sse2 avx2 neon neon_dotprod/; 1402 specialize qw/aom_variance16x8 sse2 avx2 neon neon_dotprod/; 1403 specialize qw/aom_variance8x16 sse2 neon neon_dotprod/; 1404 specialize qw/aom_variance8x8 sse2 neon neon_dotprod/; 1405 specialize qw/aom_variance8x4 sse2 neon neon_dotprod/; 1406 specialize qw/aom_variance4x8 sse2 neon neon_dotprod/; 1407 specialize qw/aom_variance4x4 sse2 neon neon_dotprod/; 1408 1409 specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/; 1410 specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/; 1411 specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/; 1412 specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/; 1413 specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/; 1414 specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/; 1415 specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/; 1416 specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/; 1417 specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/; 1418 specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/; 1419 specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/; 1420 specialize qw/aom_sub_pixel_variance8x16 neon ssse3/; 1421 specialize qw/aom_sub_pixel_variance8x8 neon ssse3/; 1422 specialize qw/aom_sub_pixel_variance8x4 neon ssse3/; 1423 specialize qw/aom_sub_pixel_variance4x8 neon ssse3/; 1424 specialize qw/aom_sub_pixel_variance4x4 neon ssse3/; 1425 1426 specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/; 1427 specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/; 1428 specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/; 1429 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/; 1430 specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/; 1431 specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/; 1432 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/; 1433 specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/; 1434 specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/; 1435 specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/; 1436 specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/; 1437 specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/; 1438 specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/; 1439 specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/; 1440 specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/; 1441 specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/; 1442 1443 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1444 specialize qw/aom_variance4x16 neon neon_dotprod sse2/; 1445 specialize qw/aom_variance16x4 neon neon_dotprod sse2 avx2/; 1446 specialize qw/aom_variance8x32 neon neon_dotprod sse2/; 1447 specialize qw/aom_variance32x8 neon neon_dotprod sse2 avx2/; 1448 specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/; 1449 specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/; 1450 1451 specialize qw/aom_sub_pixel_variance4x16 neon ssse3/; 1452 specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/; 1453 specialize qw/aom_sub_pixel_variance8x32 neon ssse3/; 1454 specialize qw/aom_sub_pixel_variance32x8 neon ssse3/; 1455 specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/; 1456 specialize qw/aom_sub_pixel_variance64x16 neon ssse3/; 1457 specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/; 1458 specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/; 1459 specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/; 1460 specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/; 1461 specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/; 1462 specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/; 1463 } 1464 1465 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1466 foreach $bd (8, 10, 12) { 1467 foreach (@encoder_block_sizes) { 1468 ($w, $h) = @$_; 1469 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1470 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1471 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1472 } 1473 } 1474 1475 specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/; 1476 specialize qw/aom_highbd_12_variance128x64 sse2 neon sve/; 1477 specialize qw/aom_highbd_12_variance64x128 sse2 neon sve/; 1478 specialize qw/aom_highbd_12_variance64x64 sse2 neon sve/; 1479 specialize qw/aom_highbd_12_variance64x32 sse2 neon sve/; 1480 specialize qw/aom_highbd_12_variance32x64 sse2 neon sve/; 1481 specialize qw/aom_highbd_12_variance32x32 sse2 neon sve/; 1482 specialize qw/aom_highbd_12_variance32x16 sse2 neon sve/; 1483 specialize qw/aom_highbd_12_variance16x32 sse2 neon sve/; 1484 specialize qw/aom_highbd_12_variance16x16 sse2 neon sve/; 1485 specialize qw/aom_highbd_12_variance16x8 sse2 neon sve/; 1486 specialize qw/aom_highbd_12_variance8x16 sse2 neon sve/; 1487 specialize qw/aom_highbd_12_variance8x8 sse2 neon sve/; 1488 specialize qw/aom_highbd_12_variance8x4 neon sve/; 1489 specialize qw/aom_highbd_12_variance4x8 neon sve/; 1490 specialize qw/aom_highbd_12_variance4x4 sse4_1 neon sve/; 1491 1492 specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/; 1493 specialize qw/aom_highbd_10_variance128x64 sse2 avx2 neon sve/; 1494 specialize qw/aom_highbd_10_variance64x128 sse2 avx2 neon sve/; 1495 specialize qw/aom_highbd_10_variance64x64 sse2 avx2 neon sve/; 1496 specialize qw/aom_highbd_10_variance64x32 sse2 avx2 neon sve/; 1497 specialize qw/aom_highbd_10_variance32x64 sse2 avx2 neon sve/; 1498 specialize qw/aom_highbd_10_variance32x32 sse2 avx2 neon sve/; 1499 specialize qw/aom_highbd_10_variance32x16 sse2 avx2 neon sve/; 1500 specialize qw/aom_highbd_10_variance16x32 sse2 avx2 neon sve/; 1501 specialize qw/aom_highbd_10_variance16x16 sse2 avx2 neon sve/; 1502 specialize qw/aom_highbd_10_variance16x8 sse2 avx2 neon sve/; 1503 specialize qw/aom_highbd_10_variance8x16 sse2 avx2 neon sve/; 1504 specialize qw/aom_highbd_10_variance8x8 sse2 avx2 neon sve/; 1505 specialize qw/aom_highbd_10_variance8x4 neon sve/; 1506 specialize qw/aom_highbd_10_variance4x8 neon sve/; 1507 specialize qw/aom_highbd_10_variance4x4 sse4_1 neon sve/; 1508 1509 specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/; 1510 specialize qw/aom_highbd_8_variance128x64 sse2 neon sve/; 1511 specialize qw/aom_highbd_8_variance64x128 sse2 neon sve/; 1512 specialize qw/aom_highbd_8_variance64x64 sse2 neon sve/; 1513 specialize qw/aom_highbd_8_variance64x32 sse2 neon sve/; 1514 specialize qw/aom_highbd_8_variance32x64 sse2 neon sve/; 1515 specialize qw/aom_highbd_8_variance32x32 sse2 neon sve/; 1516 specialize qw/aom_highbd_8_variance32x16 sse2 neon sve/; 1517 specialize qw/aom_highbd_8_variance16x32 sse2 neon sve/; 1518 specialize qw/aom_highbd_8_variance16x16 sse2 neon sve/; 1519 specialize qw/aom_highbd_8_variance16x8 sse2 neon sve/; 1520 specialize qw/aom_highbd_8_variance8x16 sse2 neon sve/; 1521 specialize qw/aom_highbd_8_variance8x8 sse2 neon sve/; 1522 specialize qw/aom_highbd_8_variance8x4 neon sve/; 1523 specialize qw/aom_highbd_8_variance4x8 neon sve/; 1524 specialize qw/aom_highbd_8_variance4x4 sse4_1 neon sve/; 1525 1526 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1527 foreach $bd (8, 10, 12) { 1528 my $avx2 = ($bd == 10) ? "avx2" : ""; 1529 specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/; 1530 specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/; 1531 specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/; 1532 specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/; 1533 specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/; 1534 specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/; 1535 } 1536 } 1537 1538 specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/; 1539 specialize qw/aom_highbd_12_sub_pixel_variance128x64 sse2 neon/; 1540 specialize qw/aom_highbd_12_sub_pixel_variance64x128 sse2 neon/; 1541 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2 neon/; 1542 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2 neon/; 1543 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2 neon/; 1544 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2 neon/; 1545 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2 neon/; 1546 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2 neon/; 1547 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 neon/; 1548 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 neon/; 1549 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2 neon/; 1550 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2 neon/; 1551 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2 neon/; 1552 specialize qw/aom_highbd_12_sub_pixel_variance4x8 neon/; 1553 specialize qw/aom_highbd_12_sub_pixel_variance4x4 sse4_1 neon/; 1554 1555 specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/; 1556 specialize qw/aom_highbd_10_sub_pixel_variance128x64 sse2 avx2 neon/; 1557 specialize qw/aom_highbd_10_sub_pixel_variance64x128 sse2 avx2 neon/; 1558 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2 avx2 neon/; 1559 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2 avx2 neon/; 1560 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2 avx2 neon/; 1561 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2 avx2 neon/; 1562 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2 avx2 neon/; 1563 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2 neon/; 1564 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2 neon/; 1565 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2 neon/; 1566 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2 avx2 neon/; 1567 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2 avx2 neon/; 1568 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2 neon/; 1569 specialize qw/aom_highbd_10_sub_pixel_variance4x8 neon/; 1570 specialize qw/aom_highbd_10_sub_pixel_variance4x4 sse4_1 neon/; 1571 1572 specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/; 1573 specialize qw/aom_highbd_8_sub_pixel_variance128x64 sse2 neon/; 1574 specialize qw/aom_highbd_8_sub_pixel_variance64x128 sse2 neon/; 1575 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2 neon/; 1576 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2 neon/; 1577 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2 neon/; 1578 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2 neon/; 1579 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2 neon/; 1580 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 neon/; 1581 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 neon/; 1582 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 neon/; 1583 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2 neon/; 1584 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2 neon/; 1585 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2 neon/; 1586 specialize qw/aom_highbd_8_sub_pixel_variance4x8 neon/; 1587 specialize qw/aom_highbd_8_sub_pixel_variance4x4 sse4_1 neon/; 1588 1589 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1590 foreach $bd (8, 10, 12) { 1591 specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/; 1592 specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/; 1593 specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/; 1594 specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/; 1595 specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/; 1596 specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/; 1597 } 1598 } 1599 1600 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128 neon/; 1601 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64 neon/; 1602 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128 neon/; 1603 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/; 1604 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2 neon/; 1605 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2 neon/; 1606 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2 neon/; 1607 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2 neon/; 1608 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2 neon/; 1609 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2 neon/; 1610 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2 neon/; 1611 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2 neon/; 1612 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2 neon/; 1613 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/; 1614 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8 neon/; 1615 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4 sse4_1 neon/; 1616 1617 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128 neon/; 1618 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64 neon/; 1619 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128 neon/; 1620 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/; 1621 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2 neon/; 1622 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2 neon/; 1623 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2 neon/; 1624 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2 neon/; 1625 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2 neon/; 1626 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2 neon/; 1627 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2 neon/; 1628 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2 neon/; 1629 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2 neon/; 1630 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/; 1631 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8 neon/; 1632 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4 sse4_1 neon/; 1633 1634 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128 neon/; 1635 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64 neon/; 1636 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128 neon/; 1637 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/; 1638 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2 neon/; 1639 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2 neon/; 1640 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2 neon/; 1641 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2 neon/; 1642 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2 neon/; 1643 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2 neon/; 1644 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2 neon/; 1645 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2 neon/; 1646 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2 neon/; 1647 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/; 1648 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8 neon/; 1649 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4 sse4_1 neon/; 1650 1651 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1652 foreach $bd (8, 10, 12) { 1653 specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/; 1654 specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/; 1655 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/; 1656 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/; 1657 specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/; 1658 specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/; 1659 } 1660 } 1661 } 1662 # 1663 # Masked Variance / Masked Subpixel Variance 1664 # 1665 foreach (@encoder_block_sizes) { 1666 ($w, $h) = @$_; 1667 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1668 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1669 } 1670 1671 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1672 foreach $bd ("_8_", "_10_", "_12_") { 1673 foreach (@encoder_block_sizes) { 1674 ($w, $h) = @$_; 1675 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1676 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1677 } 1678 } 1679 } 1680 1681 # 1682 # OBMC Variance / OBMC Subpixel Variance 1683 # 1684 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1685 foreach (@encoder_block_sizes) { 1686 ($w, $h) = @$_; 1687 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1688 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1689 specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/; 1690 specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/; 1691 } 1692 1693 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1694 foreach $bd ("_8_", "_10_", "_12_") { 1695 foreach (@encoder_block_sizes) { 1696 ($w, $h) = @$_; 1697 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1698 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1699 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/; 1700 specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/; 1701 } 1702 } 1703 } 1704 } 1705 1706 # 1707 # Comp Avg 1708 # 1709 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; 1710 specialize qw/aom_comp_avg_pred avx2 neon/; 1711 1712 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1713 add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; 1714 specialize qw/aom_highbd_comp_avg_pred neon/; 1715 1716 add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1717 specialize qw/aom_mse_wxh_16bit_highbd sse2 avx2 neon sve/; 1718 } 1719 1720 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1721 specialize qw/aom_comp_mask_pred ssse3 avx2 neon/; 1722 1723 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1724 add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1725 specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/; 1726 } 1727 1728 # Flow estimation library 1729 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1730 add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev"; 1731 specialize qw/aom_compute_mean_stddev sse4_1 avx2/; 1732 1733 add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2"; 1734 specialize qw/aom_compute_correlation sse4_1 avx2/; 1735 1736 add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v"; 1737 specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/; 1738 } 1739 1740 } # CONFIG_AV1_ENCODER 1741 1742 1;