tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

idctdsp_neon.S (4944B)


      1 /*
      2 * ARM-NEON-optimized IDCT functions
      3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
      4 *
      5 * This file is part of FFmpeg.
      6 *
      7 * FFmpeg is free software; you can redistribute it and/or
      8 * modify it under the terms of the GNU Lesser General Public
      9 * License as published by the Free Software Foundation; either
     10 * version 2.1 of the License, or (at your option) any later version.
     11 *
     12 * FFmpeg is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 * Lesser General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU Lesser General Public
     18 * License along with FFmpeg; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     20 */
     21 
     22 #include "libavutil/arm/asm.S"
     23 
     24 function ff_put_pixels_clamped_neon, export=1
     25        vld1.16         {d16-d19}, [r0,:128]!
     26        vqmovun.s16     d0, q8
     27        vld1.16         {d20-d23}, [r0,:128]!
     28        vqmovun.s16     d1, q9
     29        vld1.16         {d24-d27}, [r0,:128]!
     30        vqmovun.s16     d2, q10
     31        vld1.16         {d28-d31}, [r0,:128]!
     32        vqmovun.s16     d3, q11
     33        vst1.8          {d0},      [r1,:64], r2
     34        vqmovun.s16     d4, q12
     35        vst1.8          {d1},      [r1,:64], r2
     36        vqmovun.s16     d5, q13
     37        vst1.8          {d2},      [r1,:64], r2
     38        vqmovun.s16     d6, q14
     39        vst1.8          {d3},      [r1,:64], r2
     40        vqmovun.s16     d7, q15
     41        vst1.8          {d4},      [r1,:64], r2
     42        vst1.8          {d5},      [r1,:64], r2
     43        vst1.8          {d6},      [r1,:64], r2
     44        vst1.8          {d7},      [r1,:64], r2
     45        bx              lr
     46 endfunc
     47 
     48 function ff_put_signed_pixels_clamped_neon, export=1
     49        vmov.u8         d31, #128
     50        vld1.16         {d16-d17}, [r0,:128]!
     51        vqmovn.s16      d0, q8
     52        vld1.16         {d18-d19}, [r0,:128]!
     53        vqmovn.s16      d1, q9
     54        vld1.16         {d16-d17}, [r0,:128]!
     55        vqmovn.s16      d2, q8
     56        vld1.16         {d18-d19}, [r0,:128]!
     57        vadd.u8         d0, d0, d31
     58        vld1.16         {d20-d21}, [r0,:128]!
     59        vadd.u8         d1, d1, d31
     60        vld1.16         {d22-d23}, [r0,:128]!
     61        vadd.u8         d2, d2, d31
     62        vst1.8          {d0},      [r1,:64], r2
     63        vqmovn.s16      d3, q9
     64        vst1.8          {d1},      [r1,:64], r2
     65        vqmovn.s16      d4, q10
     66        vst1.8          {d2},      [r1,:64], r2
     67        vqmovn.s16      d5, q11
     68        vld1.16         {d24-d25}, [r0,:128]!
     69        vadd.u8         d3, d3, d31
     70        vld1.16         {d26-d27}, [r0,:128]!
     71        vadd.u8         d4, d4, d31
     72        vadd.u8         d5, d5, d31
     73        vst1.8          {d3},      [r1,:64], r2
     74        vqmovn.s16      d6, q12
     75        vst1.8          {d4},      [r1,:64], r2
     76        vqmovn.s16      d7, q13
     77        vst1.8          {d5},      [r1,:64], r2
     78        vadd.u8         d6, d6, d31
     79        vadd.u8         d7, d7, d31
     80        vst1.8          {d6},      [r1,:64], r2
     81        vst1.8          {d7},      [r1,:64], r2
     82        bx              lr
     83 endfunc
     84 
     85 function ff_add_pixels_clamped_neon, export=1
     86        mov             r3, r1
     87        vld1.8          {d16},   [r1,:64], r2
     88        vld1.16         {d0-d1}, [r0,:128]!
     89        vaddw.u8        q0, q0, d16
     90        vld1.8          {d17},   [r1,:64], r2
     91        vld1.16         {d2-d3}, [r0,:128]!
     92        vqmovun.s16     d0, q0
     93        vld1.8          {d18},   [r1,:64], r2
     94        vaddw.u8        q1, q1, d17
     95        vld1.16         {d4-d5}, [r0,:128]!
     96        vaddw.u8        q2, q2, d18
     97        vst1.8          {d0},    [r3,:64], r2
     98        vqmovun.s16     d2, q1
     99        vld1.8          {d19},   [r1,:64], r2
    100        vld1.16         {d6-d7}, [r0,:128]!
    101        vaddw.u8        q3, q3, d19
    102        vqmovun.s16     d4, q2
    103        vst1.8          {d2},    [r3,:64], r2
    104        vld1.8          {d16},   [r1,:64], r2
    105        vqmovun.s16     d6, q3
    106        vld1.16         {d0-d1}, [r0,:128]!
    107        vaddw.u8        q0, q0, d16
    108        vst1.8          {d4},    [r3,:64], r2
    109        vld1.8          {d17},   [r1,:64], r2
    110        vld1.16         {d2-d3}, [r0,:128]!
    111        vaddw.u8        q1, q1, d17
    112        vst1.8          {d6},    [r3,:64], r2
    113        vqmovun.s16     d0, q0
    114        vld1.8          {d18},   [r1,:64], r2
    115        vld1.16         {d4-d5}, [r0,:128]!
    116        vaddw.u8        q2, q2, d18
    117        vst1.8          {d0},    [r3,:64], r2
    118        vqmovun.s16     d2, q1
    119        vld1.8          {d19},   [r1,:64], r2
    120        vqmovun.s16     d4, q2
    121        vld1.16         {d6-d7}, [r0,:128]!
    122        vaddw.u8        q3, q3, d19
    123        vst1.8          {d2},    [r3,:64], r2
    124        vqmovun.s16     d6, q3
    125        vst1.8          {d4},    [r3,:64], r2
    126        vst1.8          {d6},    [r3,:64], r2
    127        bx              lr
    128 endfunc