mc.h (10199B)
1 /* 2 * Copyright © 2018-2021, VideoLAN and dav1d authors 3 * Copyright © 2018-2021, Two Orioles, LLC 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "src/cpu.h" 29 #include "src/mc.h" 30 31 #define decl_fn(type, name) \ 32 decl_##type##_fn(BF(name, ssse3)); \ 33 decl_##type##_fn(BF(name, avx2)); \ 34 decl_##type##_fn(BF(name, avx512icl)); 35 #define init_mc_fn(type, name, suffix) \ 36 c->mc[type] = BF(dav1d_put_##name, suffix) 37 #define init_mct_fn(type, name, suffix) \ 38 c->mct[type] = BF(dav1d_prep_##name, suffix) 39 #define init_mc_scaled_fn(type, name, suffix) \ 40 c->mc_scaled[type] = BF(dav1d_put_##name, suffix) 41 #define init_mct_scaled_fn(type, name, suffix) \ 42 c->mct_scaled[type] = BF(dav1d_prep_##name, suffix) 43 44 decl_8tap_fns(ssse3); 45 decl_8tap_fns(avx2); 46 decl_8tap_fns(avx512icl); 47 48 decl_fn(mc, dav1d_put_bilin); 49 decl_fn(mct, dav1d_prep_bilin); 50 51 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular); 52 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular_smooth); 53 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular_sharp); 54 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth); 55 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth_regular); 56 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth_sharp); 57 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp); 58 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp_regular); 59 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp_smooth); 60 decl_fn(mc_scaled, dav1d_put_bilin_scaled); 61 62 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular); 63 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular_smooth); 64 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular_sharp); 65 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth); 66 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth_regular); 67 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth_sharp); 68 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp); 69 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp_regular); 70 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp_smooth); 71 decl_fn(mct_scaled, dav1d_prep_bilin_scaled); 72 73 decl_fn(avg, dav1d_avg); 74 decl_fn(w_avg, dav1d_w_avg); 75 decl_fn(mask, dav1d_mask); 76 decl_fn(w_mask, dav1d_w_mask_420); 77 decl_fn(w_mask, dav1d_w_mask_422); 78 decl_fn(w_mask, dav1d_w_mask_444); 79 decl_fn(blend, dav1d_blend); 80 decl_fn(blend_dir, dav1d_blend_v); 81 decl_fn(blend_dir, dav1d_blend_h); 82 83 decl_fn(warp8x8, dav1d_warp_affine_8x8); 84 decl_warp8x8_fn(BF(dav1d_warp_affine_8x8, sse4)); 85 decl_fn(warp8x8t, dav1d_warp_affine_8x8t); 86 decl_warp8x8t_fn(BF(dav1d_warp_affine_8x8t, sse4)); 87 88 decl_fn(emu_edge, dav1d_emu_edge); 89 90 decl_fn(resize, dav1d_resize); 91 92 static ALWAYS_INLINE void mc_dsp_init_x86(Dav1dMCDSPContext *const c) { 93 const unsigned flags = dav1d_get_cpu_flags(); 94 95 if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) 96 return; 97 98 init_8tap_fns(ssse3); 99 100 init_mc_fn(FILTER_2D_BILINEAR, bilin, ssse3); 101 init_mct_fn(FILTER_2D_BILINEAR, bilin, ssse3); 102 103 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3); 104 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3); 105 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3); 106 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3); 107 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3); 108 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3); 109 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3); 110 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3); 111 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3); 112 init_mc_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3); 113 114 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, ssse3); 115 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3); 116 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, ssse3); 117 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3); 118 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, ssse3); 119 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, ssse3); 120 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, ssse3); 121 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, ssse3); 122 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, ssse3); 123 init_mct_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, ssse3); 124 125 c->avg = BF(dav1d_avg, ssse3); 126 c->w_avg = BF(dav1d_w_avg, ssse3); 127 c->mask = BF(dav1d_mask, ssse3); 128 c->w_mask[0] = BF(dav1d_w_mask_444, ssse3); 129 c->w_mask[1] = BF(dav1d_w_mask_422, ssse3); 130 c->w_mask[2] = BF(dav1d_w_mask_420, ssse3); 131 c->blend = BF(dav1d_blend, ssse3); 132 c->blend_v = BF(dav1d_blend_v, ssse3); 133 c->blend_h = BF(dav1d_blend_h, ssse3); 134 c->warp8x8 = BF(dav1d_warp_affine_8x8, ssse3); 135 c->warp8x8t = BF(dav1d_warp_affine_8x8t, ssse3); 136 c->emu_edge = BF(dav1d_emu_edge, ssse3); 137 c->resize = BF(dav1d_resize, ssse3); 138 139 if(!(flags & DAV1D_X86_CPU_FLAG_SSE41)) 140 return; 141 142 #if BITDEPTH == 8 143 c->warp8x8 = BF(dav1d_warp_affine_8x8, sse4); 144 c->warp8x8t = BF(dav1d_warp_affine_8x8t, sse4); 145 #endif 146 147 #if ARCH_X86_64 148 if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) 149 return; 150 151 init_8tap_fns(avx2); 152 153 init_mc_fn(FILTER_2D_BILINEAR, bilin, avx2); 154 init_mct_fn(FILTER_2D_BILINEAR, bilin, avx2); 155 156 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, avx2); 157 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2); 158 init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, avx2); 159 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2); 160 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, avx2); 161 init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, avx2); 162 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, avx2); 163 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, avx2); 164 init_mc_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, avx2); 165 init_mc_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, avx2); 166 167 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR, 8tap_scaled_regular, avx2); 168 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2); 169 init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_scaled_regular_sharp, avx2); 170 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2); 171 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH, 8tap_scaled_smooth, avx2); 172 init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_scaled_smooth_sharp, avx2); 173 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_scaled_sharp_regular, avx2); 174 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_scaled_sharp_smooth, avx2); 175 init_mct_scaled_fn(FILTER_2D_8TAP_SHARP, 8tap_scaled_sharp, avx2); 176 init_mct_scaled_fn(FILTER_2D_BILINEAR, bilin_scaled, avx2); 177 178 c->avg = BF(dav1d_avg, avx2); 179 c->w_avg = BF(dav1d_w_avg, avx2); 180 c->mask = BF(dav1d_mask, avx2); 181 c->w_mask[0] = BF(dav1d_w_mask_444, avx2); 182 c->w_mask[1] = BF(dav1d_w_mask_422, avx2); 183 c->w_mask[2] = BF(dav1d_w_mask_420, avx2); 184 c->blend = BF(dav1d_blend, avx2); 185 c->blend_v = BF(dav1d_blend_v, avx2); 186 c->blend_h = BF(dav1d_blend_h, avx2); 187 c->warp8x8 = BF(dav1d_warp_affine_8x8, avx2); 188 c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx2); 189 c->emu_edge = BF(dav1d_emu_edge, avx2); 190 c->resize = BF(dav1d_resize, avx2); 191 192 if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) 193 return; 194 195 init_8tap_fns(avx512icl); 196 197 init_mc_fn (FILTER_2D_BILINEAR, bilin, avx512icl); 198 init_mct_fn(FILTER_2D_BILINEAR, bilin, avx512icl); 199 200 c->avg = BF(dav1d_avg, avx512icl); 201 c->w_avg = BF(dav1d_w_avg, avx512icl); 202 c->mask = BF(dav1d_mask, avx512icl); 203 c->w_mask[0] = BF(dav1d_w_mask_444, avx512icl); 204 c->w_mask[1] = BF(dav1d_w_mask_422, avx512icl); 205 c->w_mask[2] = BF(dav1d_w_mask_420, avx512icl); 206 c->blend = BF(dav1d_blend, avx512icl); 207 c->blend_v = BF(dav1d_blend_v, avx512icl); 208 c->blend_h = BF(dav1d_blend_h, avx512icl); 209 210 if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) { 211 c->resize = BF(dav1d_resize, avx512icl); 212 c->warp8x8 = BF(dav1d_warp_affine_8x8, avx512icl); 213 c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx512icl); 214 } 215 #endif 216 }