tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

vc1dsp_init_aarch64.c (5602B)


      1 /*
      2 * This file is part of FFmpeg.
      3 *
      4 * FFmpeg is free software; you can redistribute it and/or
      5 * modify it under the terms of the GNU Lesser General Public
      6 * License as published by the Free Software Foundation; either
      7 * version 2.1 of the License, or (at your option) any later version.
      8 *
      9 * FFmpeg is distributed in the hope that it will be useful,
     10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12 * Lesser General Public License for more details.
     13 *
     14 * You should have received a copy of the GNU Lesser General Public
     15 * License along with FFmpeg; if not, write to the Free Software
     16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     17 */
     18 
     19 #include <stdint.h>
     20 
     21 #include "libavutil/attributes.h"
     22 #include "libavutil/cpu.h"
     23 #include "libavutil/aarch64/cpu.h"
     24 #include "libavutil/intreadwrite.h"
     25 #include "libavcodec/vc1dsp.h"
     26 
     27 #include "config.h"
     28 
     29 void ff_vc1_inv_trans_8x8_neon(int16_t *block);
     30 void ff_vc1_inv_trans_8x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     31 void ff_vc1_inv_trans_4x8_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     32 void ff_vc1_inv_trans_4x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     33 
     34 void ff_vc1_inv_trans_8x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     35 void ff_vc1_inv_trans_8x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     36 void ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     37 void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
     38 
     39 void ff_vc1_v_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
     40 void ff_vc1_h_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
     41 void ff_vc1_v_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
     42 void ff_vc1_h_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
     43 void ff_vc1_v_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
     44 void ff_vc1_h_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
     45 
     46 void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
     47                                int h, int x, int y);
     48 void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
     49                                int h, int x, int y);
     50 void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
     51                                int h, int x, int y);
     52 void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
     53                                int h, int x, int y);
     54 
     55 int ff_vc1_unescape_buffer_helper_neon(const uint8_t *src, int size, uint8_t *dst);
     56 
     57 static int vc1_unescape_buffer_neon(const uint8_t *src, int size, uint8_t *dst)
     58 {
     59    /* Dealing with starting and stopping, and removing escape bytes, are
     60     * comparatively less time-sensitive, so are more clearly expressed using
     61     * a C wrapper around the assembly inner loop. Note that we assume a
     62     * little-endian machine that supports unaligned loads. */
     63    int dsize = 0;
     64    while (size >= 4)
     65    {
     66        int found = 0;
     67        while (!found && (((uintptr_t) dst) & 7) && size >= 4)
     68        {
     69            found = (AV_RL32(src) &~ 0x03000000) == 0x00030000;
     70            if (!found)
     71            {
     72                *dst++ = *src++;
     73                --size;
     74                ++dsize;
     75            }
     76        }
     77        if (!found)
     78        {
     79            int skip = size - ff_vc1_unescape_buffer_helper_neon(src, size, dst);
     80            dst += skip;
     81            src += skip;
     82            size -= skip;
     83            dsize += skip;
     84            while (!found && size >= 4)
     85            {
     86                found = (AV_RL32(src) &~ 0x03000000) == 0x00030000;
     87                if (!found)
     88                {
     89                    *dst++ = *src++;
     90                    --size;
     91                    ++dsize;
     92                }
     93            }
     94        }
     95        if (found)
     96        {
     97            *dst++ = *src++;
     98            *dst++ = *src++;
     99            ++src;
    100            size -= 3;
    101            dsize += 2;
    102        }
    103    }
    104    while (size > 0)
    105    {
    106        *dst++ = *src++;
    107        --size;
    108        ++dsize;
    109    }
    110    return dsize;
    111 }
    112 
    113 av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
    114 {
    115    int cpu_flags = av_get_cpu_flags();
    116 
    117    if (have_neon(cpu_flags)) {
    118        dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_neon;
    119        dsp->vc1_inv_trans_8x4 = ff_vc1_inv_trans_8x4_neon;
    120        dsp->vc1_inv_trans_4x8 = ff_vc1_inv_trans_4x8_neon;
    121        dsp->vc1_inv_trans_4x4 = ff_vc1_inv_trans_4x4_neon;
    122        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_neon;
    123        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_neon;
    124        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_neon;
    125        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon;
    126 
    127        dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_neon;
    128        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_neon;
    129        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_neon;
    130        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_neon;
    131        dsp->vc1_v_loop_filter16 = ff_vc1_v_loop_filter16_neon;
    132        dsp->vc1_h_loop_filter16 = ff_vc1_h_loop_filter16_neon;
    133 
    134        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
    135        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
    136        dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
    137        dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
    138 
    139        dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon;
    140    }
    141 }