tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hpeldsp_neon.S (13431B)


      1 /*
      2 * ARM NEON optimised DSP functions
      3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
      4 *
      5 * This file is part of FFmpeg.
      6 *
      7 * FFmpeg is free software; you can redistribute it and/or
      8 * modify it under the terms of the GNU Lesser General Public
      9 * License as published by the Free Software Foundation; either
     10 * version 2.1 of the License, or (at your option) any later version.
     11 *
     12 * FFmpeg is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 * Lesser General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU Lesser General Public
     18 * License along with FFmpeg; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     20 */
     21 
     22 #include "libavutil/arm/asm.S"
     23 
     24 .macro  pixels16        rnd=1, avg=0
     25  .if \avg
     26        mov             r12, r0
     27  .endif
     28 1:      vld1.8          {q0},     [r1], r2
     29        vld1.8          {q1},     [r1], r2
     30        vld1.8          {q2},     [r1], r2
     31        pld             [r1, r2, lsl #2]
     32        vld1.8          {q3},     [r1], r2
     33        pld             [r1]
     34        pld             [r1, r2]
     35        pld             [r1, r2, lsl #1]
     36  .if \avg
     37        vld1.8          {q8},     [r12,:128], r2
     38        vrhadd.u8       q0,  q0,  q8
     39        vld1.8          {q9},     [r12,:128], r2
     40        vrhadd.u8       q1,  q1,  q9
     41        vld1.8          {q10},    [r12,:128], r2
     42        vrhadd.u8       q2,  q2,  q10
     43        vld1.8          {q11},    [r12,:128], r2
     44        vrhadd.u8       q3,  q3,  q11
     45  .endif
     46        subs            r3,  r3,  #4
     47        vst1.64         {q0},     [r0,:128], r2
     48        vst1.64         {q1},     [r0,:128], r2
     49        vst1.64         {q2},     [r0,:128], r2
     50        vst1.64         {q3},     [r0,:128], r2
     51        bne             1b
     52        bx              lr
     53 .endm
     54 
     55 .macro  pixels16_x2     rnd=1, avg=0
     56 1:      vld1.8          {d0-d2},  [r1], r2
     57        vld1.8          {d4-d6},  [r1], r2
     58        pld             [r1]
     59        pld             [r1, r2]
     60        subs            r3,  r3,  #2
     61        vext.8          q1,  q0,  q1,  #1
     62        avg             q0,  q0,  q1
     63        vext.8          q3,  q2,  q3,  #1
     64        avg             q2,  q2,  q3
     65  .if \avg
     66        vld1.8          {q1},     [r0,:128], r2
     67        vld1.8          {q3},     [r0,:128]
     68        vrhadd.u8       q0,  q0,  q1
     69        vrhadd.u8       q2,  q2,  q3
     70        sub             r0,  r0,  r2
     71  .endif
     72        vst1.8          {q0},     [r0,:128], r2
     73        vst1.8          {q2},     [r0,:128], r2
     74        bne             1b
     75        bx              lr
     76 .endm
     77 
     78 .macro  pixels16_y2     rnd=1, avg=0
     79        sub             r3,  r3,  #2
     80        vld1.8          {q0},     [r1], r2
     81        vld1.8          {q1},     [r1], r2
     82 1:      subs            r3,  r3,  #2
     83        avg             q2,  q0,  q1
     84        vld1.8          {q0},     [r1], r2
     85        avg             q3,  q0,  q1
     86        vld1.8          {q1},     [r1], r2
     87        pld             [r1]
     88        pld             [r1, r2]
     89  .if \avg
     90        vld1.8          {q8},     [r0,:128], r2
     91        vld1.8          {q9},     [r0,:128]
     92        vrhadd.u8       q2,  q2,  q8
     93        vrhadd.u8       q3,  q3,  q9
     94        sub             r0,  r0,  r2
     95  .endif
     96        vst1.8          {q2},     [r0,:128], r2
     97        vst1.8          {q3},     [r0,:128], r2
     98        bne             1b
     99 
    100        avg             q2,  q0,  q1
    101        vld1.8          {q0},     [r1], r2
    102        avg             q3,  q0,  q1
    103  .if \avg
    104        vld1.8          {q8},     [r0,:128], r2
    105        vld1.8          {q9},     [r0,:128]
    106        vrhadd.u8       q2,  q2,  q8
    107        vrhadd.u8       q3,  q3,  q9
    108        sub             r0,  r0,  r2
    109  .endif
    110        vst1.8          {q2},     [r0,:128], r2
    111        vst1.8          {q3},     [r0,:128], r2
    112 
    113        bx              lr
    114 .endm
    115 
    116 .macro  pixels16_xy2    rnd=1, avg=0
    117        sub             r3,  r3,  #2
    118        vld1.8          {d0-d2},  [r1], r2
    119        vld1.8          {d4-d6},  [r1], r2
    120 NRND    vmov.i16        q13, #1
    121        pld             [r1]
    122        pld             [r1, r2]
    123        vext.8          q1,  q0,  q1,  #1
    124        vext.8          q3,  q2,  q3,  #1
    125        vaddl.u8        q8,  d0,  d2
    126        vaddl.u8        q10, d1,  d3
    127        vaddl.u8        q9,  d4,  d6
    128        vaddl.u8        q11, d5,  d7
    129 1:      subs            r3,  r3,  #2
    130        vld1.8          {d0-d2},  [r1], r2
    131        vadd.u16        q12, q8,  q9
    132        pld             [r1]
    133 NRND    vadd.u16        q12, q12, q13
    134        vext.8          q15, q0,  q1,  #1
    135        vadd.u16        q1 , q10, q11
    136        shrn            d28, q12, #2
    137 NRND    vadd.u16        q1,  q1,  q13
    138        shrn            d29, q1,  #2
    139  .if \avg
    140        vld1.8          {q8},     [r0,:128]
    141        vrhadd.u8       q14, q14, q8
    142  .endif
    143        vaddl.u8        q8,  d0,  d30
    144        vld1.8          {d2-d4},  [r1], r2
    145        vaddl.u8        q10, d1,  d31
    146        vst1.8          {q14},    [r0,:128], r2
    147        vadd.u16        q12, q8,  q9
    148        pld             [r1, r2]
    149 NRND    vadd.u16        q12, q12, q13
    150        vext.8          q2,  q1,  q2,  #1
    151        vadd.u16        q0,  q10, q11
    152        shrn            d30, q12, #2
    153 NRND    vadd.u16        q0,  q0,  q13
    154        shrn            d31, q0,  #2
    155  .if \avg
    156        vld1.8          {q9},     [r0,:128]
    157        vrhadd.u8       q15, q15, q9
    158  .endif
    159        vaddl.u8        q9,  d2,  d4
    160        vaddl.u8        q11, d3,  d5
    161        vst1.8          {q15},    [r0,:128], r2
    162        bgt             1b
    163 
    164        vld1.8          {d0-d2},  [r1], r2
    165        vadd.u16        q12, q8,  q9
    166 NRND    vadd.u16        q12, q12, q13
    167        vext.8          q15, q0,  q1,  #1
    168        vadd.u16        q1 , q10, q11
    169        shrn            d28, q12, #2
    170 NRND    vadd.u16        q1,  q1,  q13
    171        shrn            d29, q1,  #2
    172  .if \avg
    173        vld1.8          {q8},     [r0,:128]
    174        vrhadd.u8       q14, q14, q8
    175  .endif
    176        vaddl.u8        q8,  d0,  d30
    177        vaddl.u8        q10, d1,  d31
    178        vst1.8          {q14},    [r0,:128], r2
    179        vadd.u16        q12, q8,  q9
    180 NRND    vadd.u16        q12, q12, q13
    181        vadd.u16        q0,  q10, q11
    182        shrn            d30, q12, #2
    183 NRND    vadd.u16        q0,  q0,  q13
    184        shrn            d31, q0,  #2
    185  .if \avg
    186        vld1.8          {q9},     [r0,:128]
    187        vrhadd.u8       q15, q15, q9
    188  .endif
    189        vst1.8          {q15},    [r0,:128], r2
    190 
    191        bx              lr
    192 .endm
    193 
    194 .macro  pixels8         rnd=1, avg=0
    195 1:      vld1.8          {d0},     [r1], r2
    196        vld1.8          {d1},     [r1], r2
    197        vld1.8          {d2},     [r1], r2
    198        pld             [r1, r2, lsl #2]
    199        vld1.8          {d3},     [r1], r2
    200        pld             [r1]
    201        pld             [r1, r2]
    202        pld             [r1, r2, lsl #1]
    203  .if \avg
    204        vld1.8          {d4},     [r0,:64], r2
    205        vrhadd.u8       d0,  d0,  d4
    206        vld1.8          {d5},     [r0,:64], r2
    207        vrhadd.u8       d1,  d1,  d5
    208        vld1.8          {d6},     [r0,:64], r2
    209        vrhadd.u8       d2,  d2,  d6
    210        vld1.8          {d7},     [r0,:64], r2
    211        vrhadd.u8       d3,  d3,  d7
    212        sub             r0,  r0,  r2,  lsl #2
    213  .endif
    214        subs            r3,  r3,  #4
    215        vst1.8          {d0},     [r0,:64], r2
    216        vst1.8          {d1},     [r0,:64], r2
    217        vst1.8          {d2},     [r0,:64], r2
    218        vst1.8          {d3},     [r0,:64], r2
    219        bne             1b
    220        bx              lr
    221 .endm
    222 
    223 .macro  pixels8_x2      rnd=1, avg=0
    224 1:      vld1.8          {q0},     [r1], r2
    225        vext.8          d1,  d0,  d1,  #1
    226        vld1.8          {q1},     [r1], r2
    227        vext.8          d3,  d2,  d3,  #1
    228        pld             [r1]
    229        pld             [r1, r2]
    230        subs            r3,  r3,  #2
    231        vswp            d1,  d2
    232        avg             q0,  q0,  q1
    233  .if \avg
    234        vld1.8          {d4},     [r0,:64], r2
    235        vld1.8          {d5},     [r0,:64]
    236        vrhadd.u8       q0,  q0,  q2
    237        sub             r0,  r0,  r2
    238  .endif
    239        vst1.8          {d0},     [r0,:64], r2
    240        vst1.8          {d1},     [r0,:64], r2
    241        bne             1b
    242        bx              lr
    243 .endm
    244 
    245 .macro  pixels8_y2      rnd=1, avg=0
    246        sub             r3,  r3,  #2
    247        vld1.8          {d0},     [r1], r2
    248        vld1.8          {d1},     [r1], r2
    249 1:      subs            r3,  r3,  #2
    250        avg             d4,  d0,  d1
    251        vld1.8          {d0},     [r1], r2
    252        avg             d5,  d0,  d1
    253        vld1.8          {d1},     [r1], r2
    254        pld             [r1]
    255        pld             [r1, r2]
    256  .if \avg
    257        vld1.8          {d2},     [r0,:64], r2
    258        vld1.8          {d3},     [r0,:64]
    259        vrhadd.u8       q2,  q2,  q1
    260        sub             r0,  r0,  r2
    261  .endif
    262        vst1.8          {d4},     [r0,:64], r2
    263        vst1.8          {d5},     [r0,:64], r2
    264        bne             1b
    265 
    266        avg             d4,  d0,  d1
    267        vld1.8          {d0},     [r1], r2
    268        avg             d5,  d0,  d1
    269  .if \avg
    270        vld1.8          {d2},     [r0,:64], r2
    271        vld1.8          {d3},     [r0,:64]
    272        vrhadd.u8       q2,  q2,  q1
    273        sub             r0,  r0,  r2
    274  .endif
    275        vst1.8          {d4},     [r0,:64], r2
    276        vst1.8          {d5},     [r0,:64], r2
    277 
    278        bx              lr
    279 .endm
    280 
    281 .macro  pixels8_xy2     rnd=1, avg=0
    282        sub             r3,  r3,  #2
    283        vld1.8          {q0},     [r1], r2
    284        vld1.8          {q1},     [r1], r2
    285 NRND    vmov.i16        q11, #1
    286        pld             [r1]
    287        pld             [r1, r2]
    288        vext.8          d4,  d0,  d1,  #1
    289        vext.8          d6,  d2,  d3,  #1
    290        vaddl.u8        q8,  d0,  d4
    291        vaddl.u8        q9,  d2,  d6
    292 1:      subs            r3,  r3,  #2
    293        vld1.8          {q0},     [r1], r2
    294        pld             [r1]
    295        vadd.u16        q10, q8,  q9
    296        vext.8          d4,  d0,  d1,  #1
    297 NRND    vadd.u16        q10, q10, q11
    298        vaddl.u8        q8,  d0,  d4
    299        shrn            d5,  q10, #2
    300        vld1.8          {q1},     [r1], r2
    301        vadd.u16        q10, q8,  q9
    302        pld             [r1, r2]
    303  .if \avg
    304        vld1.8          {d7},     [r0,:64]
    305        vrhadd.u8       d5,  d5,  d7
    306  .endif
    307 NRND    vadd.u16        q10, q10, q11
    308        vst1.8          {d5},     [r0,:64], r2
    309        shrn            d7,  q10, #2
    310  .if \avg
    311        vld1.8          {d5},     [r0,:64]
    312        vrhadd.u8       d7,  d7,  d5
    313  .endif
    314        vext.8          d6,  d2,  d3,  #1
    315        vaddl.u8        q9,  d2,  d6
    316        vst1.8          {d7},     [r0,:64], r2
    317        bgt             1b
    318 
    319        vld1.8          {q0},     [r1], r2
    320        vadd.u16        q10, q8,  q9
    321        vext.8          d4,  d0,  d1,  #1
    322 NRND    vadd.u16        q10, q10, q11
    323        vaddl.u8        q8,  d0,  d4
    324        shrn            d5,  q10, #2
    325        vadd.u16        q10, q8,  q9
    326  .if \avg
    327        vld1.8          {d7},     [r0,:64]
    328        vrhadd.u8       d5,  d5,  d7
    329  .endif
    330 NRND    vadd.u16        q10, q10, q11
    331        vst1.8          {d5},     [r0,:64], r2
    332        shrn            d7,  q10, #2
    333  .if \avg
    334        vld1.8          {d5},     [r0,:64]
    335        vrhadd.u8       d7,  d7,  d5
    336  .endif
    337        vst1.8          {d7},     [r0,:64], r2
    338 
    339        bx              lr
    340 .endm
    341 
    342 .macro  pixfunc         pfx, name, suf, rnd=1, avg=0
    343  .if \rnd
    344    .macro avg  rd, rn, rm
    345        vrhadd.u8       \rd, \rn, \rm
    346    .endm
    347    .macro shrn rd, rn, rm
    348        vrshrn.u16      \rd, \rn, \rm
    349    .endm
    350    .macro NRND insn:vararg
    351    .endm
    352  .else
    353    .macro avg  rd, rn, rm
    354        vhadd.u8        \rd, \rn, \rm
    355    .endm
    356    .macro shrn rd, rn, rm
    357        vshrn.u16       \rd, \rn, \rm
    358    .endm
    359    .macro NRND insn:vararg
    360        \insn
    361    .endm
    362  .endif
    363 function ff_\pfx\name\suf\()_neon, export=1
    364        \name           \rnd, \avg
    365 endfunc
    366        .purgem         avg
    367        .purgem         shrn
    368        .purgem         NRND
    369 .endm
    370 
    371 .macro  pixfunc2        pfx, name, avg=0
    372        pixfunc         \pfx, \name,          rnd=1, avg=\avg
    373        pixfunc         \pfx, \name, _no_rnd, rnd=0, avg=\avg
    374 .endm
    375 
    376 function ff_put_h264_qpel16_mc00_neon, export=1
    377        mov             r3,  #16
    378 endfunc
    379 
    380        pixfunc         put_, pixels16,     avg=0
    381        pixfunc2        put_, pixels16_x2,  avg=0
    382        pixfunc2        put_, pixels16_y2,  avg=0
    383        pixfunc2        put_, pixels16_xy2, avg=0
    384 
    385 function ff_avg_h264_qpel16_mc00_neon, export=1
    386        mov             r3,  #16
    387 endfunc
    388 
    389        pixfunc         avg_, pixels16,     avg=1
    390        pixfunc2        avg_, pixels16_x2,  avg=1
    391        pixfunc2        avg_, pixels16_y2,  avg=1
    392        pixfunc2        avg_, pixels16_xy2, avg=1
    393 
    394 function ff_put_h264_qpel8_mc00_neon, export=1
    395        mov             r3,  #8
    396 endfunc
    397 
    398        pixfunc         put_, pixels8,     avg=0
    399        pixfunc2        put_, pixels8_x2,  avg=0
    400        pixfunc2        put_, pixels8_y2,  avg=0
    401        pixfunc2        put_, pixels8_xy2, avg=0
    402 
    403 function ff_avg_h264_qpel8_mc00_neon, export=1
    404        mov             r3,  #8
    405 endfunc
    406 
    407        pixfunc         avg_, pixels8,     avg=1
    408        pixfunc         avg_, pixels8_x2,  avg=1
    409        pixfunc         avg_, pixels8_y2,  avg=1
    410        pixfunc         avg_, pixels8_xy2, avg=1