tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pixelutils_init.c (3994B)


      1 /*
      2 * This file is part of FFmpeg.
      3 *
      4 * FFmpeg is free software; you can redistribute it and/or
      5 * modify it under the terms of the GNU Lesser General Public
      6 * License as published by the Free Software Foundation; either
      7 * version 2.1 of the License, or (at your option) any later version.
      8 *
      9 * FFmpeg is distributed in the hope that it will be useful,
     10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12 * Lesser General Public License for more details.
     13 *
     14 * You should have received a copy of the GNU Lesser General Public
     15 * License along with FFmpeg; if not, write to the Free Software
     16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     17 */
     18 
     19 #include "config.h"
     20 
     21 #include "pixelutils.h"
     22 #include "cpu.h"
     23 
     24 int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
     25                                 const uint8_t *src2, ptrdiff_t stride2);
     26 
     27 int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
     28                                 const uint8_t *src2, ptrdiff_t stride2);
     29 int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
     30                                   const uint8_t *src2, ptrdiff_t stride2);
     31 int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
     32                                   const uint8_t *src2, ptrdiff_t stride2);
     33 
     34 int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
     35                                 const uint8_t *src2, ptrdiff_t stride2);
     36 int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
     37                                   const uint8_t *src2, ptrdiff_t stride2);
     38 int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
     39                                   const uint8_t *src2, ptrdiff_t stride2);
     40 
     41 int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
     42                                 const uint8_t *src2, ptrdiff_t stride2);
     43 int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
     44                                   const uint8_t *src2, ptrdiff_t stride2);
     45 int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
     46                                   const uint8_t *src2, ptrdiff_t stride2);
     47 
     48 void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
     49 {
     50    int cpu_flags = av_get_cpu_flags();
     51 
     52    // The best way to use SSE2 would be to do 2 SADs in parallel,
     53    // but we'd have to modify the pixelutils API to return SIMD functions.
     54 
     55    // It's probably not faster to shuffle data around
     56    // to get two lines of 8 pixels into a single 16byte register,
     57    // so just use the MMX 8x8 version even when SSE2 is available.
     58    if (EXTERNAL_MMXEXT(cpu_flags)) {
     59        sad[2] = ff_pixelutils_sad_8x8_mmxext;
     60    }
     61 
     62    if (EXTERNAL_SSE2(cpu_flags)) {
     63        switch (aligned) {
     64        case 0: sad[3] = ff_pixelutils_sad_16x16_sse2;   break; // src1 unaligned, src2 unaligned
     65        case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1   aligned, src2 unaligned
     66        case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1   aligned, src2   aligned
     67        }
     68    }
     69 
     70    if (EXTERNAL_SSE2(cpu_flags)) {
     71        switch (aligned) {
     72        case 0: sad[4] = ff_pixelutils_sad_32x32_sse2;   break; // src1 unaligned, src2 unaligned
     73        case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1   aligned, src2 unaligned
     74        case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1   aligned, src2   aligned
     75        }
     76    }
     77 
     78    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
     79        switch (aligned) {
     80        case 0: sad[4] = ff_pixelutils_sad_32x32_avx2;   break; // src1 unaligned, src2 unaligned
     81        case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1   aligned, src2 unaligned
     82        case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1   aligned, src2   aligned
     83        }
     84    }
     85 }