tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hpeldsp_arm.S (21480B)


      1 @
      2 @ ARMv4-optimized halfpel functions
      3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
      4 @
      5 @ This file is part of FFmpeg.
      6 @
      7 @ FFmpeg is free software; you can redistribute it and/or
      8 @ modify it under the terms of the GNU Lesser General Public
      9 @ License as published by the Free Software Foundation; either
     10 @ version 2.1 of the License, or (at your option) any later version.
     11 @
     12 @ FFmpeg is distributed in the hope that it will be useful,
     13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 @ Lesser General Public License for more details.
     16 @
     17 @ You should have received a copy of the GNU Lesser General Public
     18 @ License along with FFmpeg; if not, write to the Free Software
     19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     20 @
     21 
     22 #include "config.h"
     23 #include "libavutil/arm/asm.S"
     24 
     25 #if !HAVE_ARMV5TE_EXTERNAL
     26 #define pld @
     27 #endif
     28 
     29 .macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
     30        mov             \Rd0, \Rn0, lsr #(\shift * 8)
     31        mov             \Rd1, \Rn1, lsr #(\shift * 8)
     32        mov             \Rd2, \Rn2, lsr #(\shift * 8)
     33        mov             \Rd3, \Rn3, lsr #(\shift * 8)
     34        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
     35        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
     36        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
     37        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
     38 .endm
     39 .macro  ALIGN_DWORD shift, R0, R1, R2
     40        mov             \R0, \R0, lsr #(\shift * 8)
     41        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
     42        mov             \R1, \R1, lsr #(\shift * 8)
     43        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
     44 .endm
     45 .macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
     46        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
     47        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
     48        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
     49        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
     50 .endm
     51 
     52 .macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
     53        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
     54        @ Rmask = 0xFEFEFEFE
     55        @ Rn = destroy
     56        eor             \Rd0, \Rn0, \Rm0
     57        eor             \Rd1, \Rn1, \Rm1
     58        orr             \Rn0, \Rn0, \Rm0
     59        orr             \Rn1, \Rn1, \Rm1
     60        and             \Rd0, \Rd0, \Rmask
     61        and             \Rd1, \Rd1, \Rmask
     62        sub             \Rd0, \Rn0, \Rd0, lsr #1
     63        sub             \Rd1, \Rn1, \Rd1, lsr #1
     64 .endm
     65 
     66 .macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
     67        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
     68        @ Rmask = 0xFEFEFEFE
     69        @ Rn = destroy
     70        eor             \Rd0, \Rn0, \Rm0
     71        eor             \Rd1, \Rn1, \Rm1
     72        and             \Rn0, \Rn0, \Rm0
     73        and             \Rn1, \Rn1, \Rm1
     74        and             \Rd0, \Rd0, \Rmask
     75        and             \Rd1, \Rd1, \Rmask
     76        add             \Rd0, \Rn0, \Rd0, lsr #1
     77        add             \Rd1, \Rn1, \Rd1, lsr #1
     78 .endm
     79 
     80 .macro  JMP_ALIGN tmp, reg
     81        ands            \tmp, \reg, #3
     82        bic             \reg, \reg, #3
     83        beq             1f
     84        subs            \tmp, \tmp, #1
     85        beq             2f
     86        subs            \tmp, \tmp, #1
     87        beq             3f
     88        b    4f
     89 .endm
     90 
     91 @ ----------------------------------------------------------------
     92 function ff_put_pixels16_arm, export=1, align=5
     93        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
     94        @ block = word aligned, pixles = unaligned
     95        pld             [r1]
     96        push            {r4-r11, lr}
     97        JMP_ALIGN       r5,  r1
     98 1:
     99        ldm             r1,  {r4-r7}
    100        add             r1,  r1,  r2
    101        stm             r0,  {r4-r7}
    102        pld             [r1]
    103        subs            r3,  r3,  #1
    104        add             r0,  r0,  r2
    105        bne             1b
    106        pop             {r4-r11, pc}
    107        .align 5
    108 2:
    109        ldm             r1,  {r4-r8}
    110        add             r1,  r1,  r2
    111        ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
    112        pld             [r1]
    113        subs            r3,  r3,  #1
    114        stm             r0,  {r9-r12}
    115        add             r0,  r0,  r2
    116        bne             2b
    117        pop             {r4-r11, pc}
    118        .align 5
    119 3:
    120        ldm             r1,  {r4-r8}
    121        add             r1,  r1,  r2
    122        ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
    123        pld             [r1]
    124        subs            r3,  r3,  #1
    125        stm             r0,  {r9-r12}
    126        add             r0,  r0,  r2
    127        bne             3b
    128        pop             {r4-r11, pc}
    129        .align 5
    130 4:
    131        ldm             r1,  {r4-r8}
    132        add             r1,  r1,  r2
    133        ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
    134        pld             [r1]
    135        subs            r3,  r3,  #1
    136        stm             r0,  {r9-r12}
    137        add             r0,  r0,  r2
    138        bne             4b
    139        pop             {r4-r11,pc}
    140 endfunc
    141 
    142 @ ----------------------------------------------------------------
    143 function ff_put_pixels8_arm, export=1, align=5
    144        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    145        @ block = word aligned, pixles = unaligned
    146        pld             [r1]
    147        push            {r4-r5,lr}
    148        JMP_ALIGN       r5,  r1
    149 1:
    150        ldm             r1,  {r4-r5}
    151        add             r1,  r1,  r2
    152        subs            r3,  r3,  #1
    153        pld             [r1]
    154        stm             r0,  {r4-r5}
    155        add             r0,  r0,  r2
    156        bne             1b
    157        pop             {r4-r5,pc}
    158        .align 5
    159 2:
    160        ldm             r1,  {r4-r5, r12}
    161        add             r1,  r1,  r2
    162        ALIGN_DWORD     1,   r4,  r5,  r12
    163        pld             [r1]
    164        subs            r3,  r3,  #1
    165        stm             r0,  {r4-r5}
    166        add             r0,  r0,  r2
    167        bne             2b
    168        pop             {r4-r5,pc}
    169        .align 5
    170 3:
    171        ldm             r1,  {r4-r5, r12}
    172        add             r1,  r1,  r2
    173        ALIGN_DWORD     2,   r4,  r5,  r12
    174        pld             [r1]
    175        subs            r3,  r3,  #1
    176        stm             r0,  {r4-r5}
    177        add             r0,  r0,  r2
    178        bne             3b
    179        pop             {r4-r5,pc}
    180        .align 5
    181 4:
    182        ldm             r1,  {r4-r5, r12}
    183        add             r1,  r1,  r2
    184        ALIGN_DWORD     3,   r4,  r5,  r12
    185        pld             [r1]
    186        subs            r3,  r3,  #1
    187        stm             r0,  {r4-r5}
    188        add             r0,  r0,  r2
    189        bne             4b
    190        pop             {r4-r5,pc}
    191 endfunc
    192 
    193 @ ----------------------------------------------------------------
    194 function ff_put_pixels8_x2_arm, export=1, align=5
    195        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    196        @ block = word aligned, pixles = unaligned
    197        pld             [r1]
    198        push            {r4-r10,lr}
    199        ldr             r12, =0xfefefefe
    200        JMP_ALIGN       r5,  r1
    201 1:
    202        ldm             r1,  {r4-r5, r10}
    203        add             r1,  r1,  r2
    204        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
    205        pld             [r1]
    206        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
    207        subs            r3,  r3,  #1
    208        stm             r0,  {r8-r9}
    209        add             r0,  r0,  r2
    210        bne             1b
    211        pop             {r4-r10,pc}
    212        .align 5
    213 2:
    214        ldm             r1,  {r4-r5, r10}
    215        add             r1,  r1,  r2
    216        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
    217        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
    218        pld             [r1]
    219        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
    220        subs            r3,  r3,  #1
    221        stm             r0,  {r4-r5}
    222        add             r0,  r0,  r2
    223        bne             2b
    224        pop             {r4-r10,pc}
    225        .align 5
    226 3:
    227        ldm             r1,  {r4-r5, r10}
    228        add             r1,  r1,  r2
    229        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
    230        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
    231        pld             [r1]
    232        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
    233        subs            r3,  r3,  #1
    234        stm             r0,  {r4-r5}
    235        add             r0,  r0,  r2
    236        bne             3b
    237        pop             {r4-r10,pc}
    238        .align 5
    239 4:
    240        ldm             r1,  {r4-r5, r10}
    241        add             r1,  r1,  r2
    242        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
    243        pld             [r1]
    244        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
    245        subs            r3,  r3,  #1
    246        stm             r0,  {r8-r9}
    247        add             r0,  r0,  r2
    248        bne             4b
    249        pop             {r4-r10,pc}
    250 endfunc
    251 
    252 function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5
    253        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    254        @ block = word aligned, pixles = unaligned
    255        pld             [r1]
    256        push            {r4-r10,lr}
    257        ldr             r12, =0xfefefefe
    258        JMP_ALIGN       r5,  r1
    259 1:
    260        ldm             r1,  {r4-r5, r10}
    261        add             r1,  r1,  r2
    262        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
    263        pld             [r1]
    264        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
    265        subs            r3,  r3,  #1
    266        stm             r0,  {r8-r9}
    267        add             r0,  r0,  r2
    268        bne             1b
    269        pop             {r4-r10,pc}
    270        .align 5
    271 2:
    272        ldm             r1,  {r4-r5, r10}
    273        add             r1,  r1,  r2
    274        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
    275        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
    276        pld             [r1]
    277        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
    278        subs            r3,  r3,  #1
    279        stm             r0,  {r4-r5}
    280        add             r0,  r0,  r2
    281        bne             2b
    282        pop             {r4-r10,pc}
    283        .align 5
    284 3:
    285        ldm             r1,  {r4-r5, r10}
    286        add             r1,  r1,  r2
    287        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
    288        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
    289        pld             [r1]
    290        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
    291        subs            r3,  r3,  #1
    292        stm             r0,  {r4-r5}
    293        add             r0,  r0,  r2
    294        bne             3b
    295        pop             {r4-r10,pc}
    296        .align 5
    297 4:
    298        ldm             r1,  {r4-r5, r10}
    299        add             r1,  r1,  r2
    300        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
    301        pld             [r1]
    302        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
    303        subs            r3,  r3,  #1
    304        stm             r0,  {r8-r9}
    305        add             r0,  r0,  r2
    306        bne             4b
    307        pop             {r4-r10,pc}
    308 endfunc
    309 
    310 
    311 @ ----------------------------------------------------------------
    312 function ff_put_pixels8_y2_arm, export=1, align=5
    313        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    314        @ block = word aligned, pixles = unaligned
    315        pld             [r1]
    316        push            {r4-r11,lr}
    317        mov             r3,  r3,  lsr #1
    318        ldr             r12, =0xfefefefe
    319        JMP_ALIGN       r5,  r1
    320 1:
    321        ldm             r1,  {r4-r5}
    322        add             r1,  r1,  r2
    323 6:      ldm             r1,  {r6-r7}
    324        add             r1,  r1,  r2
    325        pld             [r1]
    326        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
    327        ldm             r1,  {r4-r5}
    328        add             r1,  r1,  r2
    329        stm             r0,  {r8-r9}
    330        add             r0,  r0,  r2
    331        pld             [r1]
    332        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
    333        subs            r3,  r3,  #1
    334        stm             r0,  {r8-r9}
    335        add             r0,  r0,  r2
    336        bne             6b
    337        pop             {r4-r11,pc}
    338        .align 5
    339 2:
    340        ldm             r1,  {r4-r6}
    341        add             r1,  r1,  r2
    342        pld             [r1]
    343        ALIGN_DWORD     1,   r4,  r5,  r6
    344 6:      ldm             r1,  {r7-r9}
    345        add             r1,  r1,  r2
    346        pld             [r1]
    347        ALIGN_DWORD     1,   r7,  r8,  r9
    348        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
    349        stm             r0,  {r10-r11}
    350        add             r0,  r0,  r2
    351        ldm             r1,  {r4-r6}
    352        add             r1,  r1,  r2
    353        pld             [r1]
    354        ALIGN_DWORD     1,   r4,  r5,  r6
    355        subs            r3,  r3,  #1
    356        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
    357        stm             r0,  {r10-r11}
    358        add             r0,  r0,  r2
    359        bne             6b
    360        pop             {r4-r11,pc}
    361        .align 5
    362 3:
    363        ldm             r1,  {r4-r6}
    364        add             r1,  r1,  r2
    365        pld             [r1]
    366        ALIGN_DWORD     2,   r4,  r5,  r6
    367 6:      ldm             r1,  {r7-r9}
    368        add             r1,  r1,  r2
    369        pld             [r1]
    370        ALIGN_DWORD     2,   r7,  r8,  r9
    371        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
    372        stm             r0,  {r10-r11}
    373        add             r0,  r0,  r2
    374        ldm             r1,  {r4-r6}
    375        add             r1,  r1,  r2
    376        pld             [r1]
    377        ALIGN_DWORD     2,   r4,  r5,  r6
    378        subs            r3,  r3,  #1
    379        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
    380        stm             r0,  {r10-r11}
    381        add             r0,  r0,  r2
    382        bne             6b
    383        pop             {r4-r11,pc}
    384        .align 5
    385 4:
    386        ldm             r1,  {r4-r6}
    387        add             r1,  r1,  r2
    388        pld             [r1]
    389        ALIGN_DWORD     3,   r4,  r5,  r6
    390 6:      ldm             r1,  {r7-r9}
    391        add             r1,  r1,  r2
    392        pld             [r1]
    393        ALIGN_DWORD     3,   r7,  r8,  r9
    394        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
    395        stm             r0,  {r10-r11}
    396        add             r0,  r0,  r2
    397        ldm             r1,  {r4-r6}
    398        add             r1,  r1,  r2
    399        pld             [r1]
    400        ALIGN_DWORD     3,   r4,  r5,  r6
    401        subs            r3,  r3,  #1
    402        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
    403        stm             r0,  {r10-r11}
    404        add             r0,  r0,  r2
    405        bne             6b
    406        pop             {r4-r11,pc}
    407 endfunc
    408 
    409 function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5
    410        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    411        @ block = word aligned, pixles = unaligned
    412        pld             [r1]
    413        push            {r4-r11,lr}
    414        mov             r3,  r3,  lsr #1
    415        ldr             r12, =0xfefefefe
    416        JMP_ALIGN       r5,  r1
    417 1:
    418        ldm             r1,  {r4-r5}
    419        add             r1,  r1,  r2
    420 6:      ldm             r1,  {r6-r7}
    421        add             r1,  r1,  r2
    422        pld             [r1]
    423        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
    424        ldm             r1,  {r4-r5}
    425        add             r1,  r1,  r2
    426        stm             r0,  {r8-r9}
    427        add             r0,  r0,  r2
    428        pld             [r1]
    429        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
    430        subs            r3,  r3,  #1
    431        stm             r0,  {r8-r9}
    432        add             r0,  r0,  r2
    433        bne             6b
    434        pop             {r4-r11,pc}
    435        .align 5
    436 2:
    437        ldm             r1,  {r4-r6}
    438        add             r1,  r1,  r2
    439        pld             [r1]
    440        ALIGN_DWORD     1,   r4,  r5,  r6
    441 6:      ldm             r1,  {r7-r9}
    442        add             r1,  r1,  r2
    443        pld             [r1]
    444        ALIGN_DWORD     1,   r7,  r8,  r9
    445        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
    446        stm             r0,  {r10-r11}
    447        add             r0,  r0,  r2
    448        ldm             r1,  {r4-r6}
    449        add             r1,  r1,  r2
    450        pld             [r1]
    451        ALIGN_DWORD     1,   r4,  r5,  r6
    452        subs            r3,  r3,  #1
    453        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
    454        stm             r0,  {r10-r11}
    455        add             r0,  r0,  r2
    456        bne             6b
    457        pop             {r4-r11,pc}
    458        .align 5
    459 3:
    460        ldm             r1,  {r4-r6}
    461        add             r1,  r1,  r2
    462        pld             [r1]
    463        ALIGN_DWORD     2,   r4,  r5,  r6
    464 6:      ldm             r1,  {r7-r9}
    465        add             r1,  r1,  r2
    466        pld             [r1]
    467        ALIGN_DWORD     2,   r7,  r8,  r9
    468        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
    469        stm             r0,  {r10-r11}
    470        add             r0,  r0,  r2
    471        ldm             r1,  {r4-r6}
    472        add             r1,  r1,  r2
    473        pld             [r1]
    474        ALIGN_DWORD     2,   r4,  r5,  r6
    475        subs            r3,  r3,  #1
    476        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
    477        stm             r0,  {r10-r11}
    478        add             r0,  r0,  r2
    479        bne             6b
    480        pop             {r4-r11,pc}
    481        .align 5
    482 4:
    483        ldm             r1,  {r4-r6}
    484        add             r1,  r1,  r2
    485        pld             [r1]
    486        ALIGN_DWORD     3,   r4,  r5,  r6
    487 6:      ldm             r1,  {r7-r9}
    488        add             r1,  r1,  r2
    489        pld             [r1]
    490        ALIGN_DWORD     3,   r7,  r8,  r9
    491        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
    492        stm             r0,  {r10-r11}
    493        add             r0,  r0,  r2
    494        ldm             r1,  {r4-r6}
    495        add             r1,  r1,  r2
    496        pld             [r1]
    497        ALIGN_DWORD     3,   r4,  r5,  r6
    498        subs            r3,  r3,  #1
    499        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
    500        stm             r0,  {r10-r11}
    501        add             r0,  r0,  r2
    502        bne             6b
    503        pop             {r4-r11,pc}
    504 endfunc
    505 
    506        .ltorg
    507 
    508 @ ----------------------------------------------------------------
    509 .macro  RND_XY2_IT align, rnd
    510        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
    511        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
    512 .if \align == 0
    513        ldm             r1,  {r6-r8}
    514 .elseif \align == 3
    515        ldm             r1,  {r5-r7}
    516 .else
    517        ldm             r1,  {r8-r10}
    518 .endif
    519        add             r1,  r1,  r2
    520        pld             [r1]
    521 .if \align == 0
    522        ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
    523 .elseif \align == 1
    524        ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
    525        ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
    526 .elseif \align == 2
    527        ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
    528        ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
    529 .elseif \align == 3
    530        ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
    531 .endif
    532        ldr             r14, =0x03030303
    533        tst             r3,  #1
    534        and             r8,  r4,  r14
    535        and             r9,  r5,  r14
    536        and             r10, r6,  r14
    537        and             r11, r7,  r14
    538        it              eq
    539        andeq           r14, r14, r14, \rnd #1
    540        add             r8,  r8,  r10
    541        add             r9,  r9,  r11
    542        ldr             r12, =0xfcfcfcfc >> 2
    543        itt             eq
    544        addeq           r8,  r8,  r14
    545        addeq           r9,  r9,  r14
    546        and             r4,  r12, r4,  lsr #2
    547        and             r5,  r12, r5,  lsr #2
    548        and             r6,  r12, r6,  lsr #2
    549        and             r7,  r12, r7,  lsr #2
    550        add             r10, r4,  r6
    551        add             r11, r5,  r7
    552        subs            r3,  r3,  #1
    553 .endm
    554 
    555 .macro RND_XY2_EXPAND align, rnd
    556        RND_XY2_IT      \align, \rnd
    557 6:      push            {r8-r11}
    558        RND_XY2_IT      \align, \rnd
    559        pop             {r4-r7}
    560        add             r4,  r4,  r8
    561        add             r5,  r5,  r9
    562        ldr             r14, =0x0f0f0f0f
    563        add             r6,  r6,  r10
    564        add             r7,  r7,  r11
    565        and             r4,  r14, r4,  lsr #2
    566        and             r5,  r14, r5,  lsr #2
    567        add             r4,  r4,  r6
    568        add             r5,  r5,  r7
    569        stm             r0,  {r4-r5}
    570        add             r0,  r0,  r2
    571        bge             6b
    572        pop             {r4-r11,pc}
    573 .endm
    574 
    575 function ff_put_pixels8_xy2_arm, export=1, align=5
    576        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    577        @ block = word aligned, pixles = unaligned
    578        pld             [r1]
    579        push            {r4-r11,lr} @ R14 is also called LR
    580        JMP_ALIGN       r5,  r1
    581 1:      RND_XY2_EXPAND  0, lsl
    582        .align 5
    583 2:      RND_XY2_EXPAND  1, lsl
    584        .align 5
    585 3:      RND_XY2_EXPAND  2, lsl
    586        .align 5
    587 4:      RND_XY2_EXPAND  3, lsl
    588 endfunc
    589 
    590 function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5
    591        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
    592        @ block = word aligned, pixles = unaligned
    593        pld             [r1]
    594        push            {r4-r11,lr}
    595        JMP_ALIGN       r5,  r1
    596 1:      RND_XY2_EXPAND  0, lsr
    597        .align 5
    598 2:      RND_XY2_EXPAND  1, lsr
    599        .align 5
    600 3:      RND_XY2_EXPAND  2, lsr
    601        .align 5
    602 4:      RND_XY2_EXPAND  3, lsr
    603 endfunc