tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

simple_idct_armv5te.S (17043B)


      1 /*
      2 * Simple IDCT
      3 *
      4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
      5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
      6 *
      7 * This file is part of FFmpeg.
      8 *
      9 * FFmpeg is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU Lesser General Public
     11 * License as published by the Free Software Foundation; either
     12 * version 2.1 of the License, or (at your option) any later version.
     13 *
     14 * FFmpeg is distributed in the hope that it will be useful,
     15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17 * Lesser General Public License for more details.
     18 *
     19 * You should have received a copy of the GNU Lesser General Public
     20 * License along with FFmpeg; if not, write to the Free Software
     21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     22 */
     23 
     24 #include "libavutil/arm/asm.S"
     25 
     26 #define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     27 #define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     28 #define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     29 #define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     30 #define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     31 #define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     32 #define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
     33 #define ROW_SHIFT 11
     34 #define COL_SHIFT 20
     35 
     36 #define W13 (W1 | (W3 << 16))
     37 #define W26 (W2 | (W6 << 16))
     38 #define W57 (W5 | (W7 << 16))
     39 
     40 function idct_row_armv5te
     41        str    lr, [sp, #-4]!
     42 
     43        ldrd   v1, v2, [a1, #8]
     44        ldrd   a3, a4, [a1]          /* a3 = row[1:0], a4 = row[3:2] */
     45        orrs   v1, v1, v2
     46        itt    eq
     47        cmpeq  v1, a4
     48        cmpeq  v1, a3, lsr #16
     49        beq    row_dc_only
     50 
     51        mov    v1, #(1<<(ROW_SHIFT-1))
     52        mov    ip, #16384
     53        sub    ip, ip, #1            /* ip = W4 */
     54        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
     55        ldr    ip, =W26              /* ip = W2 | (W6 << 16) */
     56        smultb a2, ip, a4
     57        smulbb lr, ip, a4
     58        add    v2, v1, a2
     59        sub    v3, v1, a2
     60        sub    v4, v1, lr
     61        add    v1, v1, lr
     62 
     63        ldr    ip, =W13              /* ip = W1 | (W3 << 16) */
     64        ldr    lr, =W57              /* lr = W5 | (W7 << 16) */
     65        smulbt v5, ip, a3
     66        smultt v6, lr, a4
     67        smlatt v5, ip, a4, v5
     68        smultt a2, ip, a3
     69        smulbt v7, lr, a3
     70        sub    v6, v6, a2
     71        smulbt a2, ip, a4
     72        smultt fp, lr, a3
     73        sub    v7, v7, a2
     74        smulbt a2, lr, a4
     75        ldrd   a3, a4, [a1, #8]     /* a3=row[5:4] a4=row[7:6] */
     76        sub    fp, fp, a2
     77 
     78        orrs   a2, a3, a4
     79        beq    1f
     80 
     81        smlabt v5, lr, a3, v5
     82        smlabt v6, ip, a3, v6
     83        smlatt v5, lr, a4, v5
     84        smlabt v6, lr, a4, v6
     85        smlatt v7, lr, a3, v7
     86        smlatt fp, ip, a3, fp
     87        smulbt a2, ip, a4
     88        smlatt v7, ip, a4, v7
     89        sub    fp, fp, a2
     90 
     91        ldr    ip, =W26              /* ip = W2 | (W6 << 16) */
     92        mov    a2, #16384
     93        sub    a2, a2, #1            /* a2 =  W4 */
     94        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
     95        smultb lr, ip, a4            /* lr =  W6*row[6] */
     96        add    v1, v1, a2            /* v1 += W4*row[4] */
     97        add    v1, v1, lr            /* v1 += W6*row[6] */
     98        add    v4, v4, a2            /* v4 += W4*row[4] */
     99        sub    v4, v4, lr            /* v4 -= W6*row[6] */
    100        smulbb lr, ip, a4            /* lr =  W2*row[6] */
    101        sub    v2, v2, a2            /* v2 -= W4*row[4] */
    102        sub    v2, v2, lr            /* v2 -= W2*row[6] */
    103        sub    v3, v3, a2            /* v3 -= W4*row[4] */
    104        add    v3, v3, lr            /* v3 += W2*row[6] */
    105 
    106 1:      add    a2, v1, v5
    107        mov    a3, a2, lsr #11
    108        bic    a3, a3, #0x1f0000
    109        sub    a2, v2, v6
    110        mov    a2, a2, lsr #11
    111        add    a3, a3, a2, lsl #16
    112        add    a2, v3, v7
    113        mov    a4, a2, lsr #11
    114        bic    a4, a4, #0x1f0000
    115        add    a2, v4, fp
    116        mov    a2, a2, lsr #11
    117        add    a4, a4, a2, lsl #16
    118        strd   a3, a4, [a1]
    119 
    120        sub    a2, v4, fp
    121        mov    a3, a2, lsr #11
    122        bic    a3, a3, #0x1f0000
    123        sub    a2, v3, v7
    124        mov    a2, a2, lsr #11
    125        add    a3, a3, a2, lsl #16
    126        add    a2, v2, v6
    127        mov    a4, a2, lsr #11
    128        bic    a4, a4, #0x1f0000
    129        sub    a2, v1, v5
    130        mov    a2, a2, lsr #11
    131        add    a4, a4, a2, lsl #16
    132        strd   a3, a4, [a1, #8]
    133 
    134        ldr    pc, [sp], #4
    135 
    136 row_dc_only:
    137        orr    a3, a3, a3, lsl #16
    138        bic    a3, a3, #0xe000
    139        mov    a3, a3, lsl #3
    140        mov    a4, a3
    141        strd   a3, a4, [a1]
    142        strd   a3, a4, [a1, #8]
    143 
    144        ldr    pc, [sp], #4
    145 endfunc
    146 
    147        .macro idct_col
    148        ldr    a4, [a1]              /* a4 = col[1:0] */
    149        mov    ip, #16384
    150        sub    ip, ip, #1            /* ip = W4 */
    151        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
    152        add    v2, v1, a4, asr #16
    153        rsb    v2, v2, v2, lsl #14
    154        mov    a4, a4, lsl #16
    155        add    v1, v1, a4, asr #16
    156        ldr    a4, [a1, #(16*4)]
    157        rsb    v1, v1, v1, lsl #14
    158 
    159        smulbb lr, ip, a4
    160        smulbt a3, ip, a4
    161        sub    v3, v1, lr
    162        sub    v5, v1, lr
    163        add    v7, v1, lr
    164        add    v1, v1, lr
    165        sub    v4, v2, a3
    166        sub    v6, v2, a3
    167        add    fp, v2, a3
    168        ldr    ip, =W26
    169        ldr    a4, [a1, #(16*2)]
    170        add    v2, v2, a3
    171 
    172        smulbb lr, ip, a4
    173        smultb a3, ip, a4
    174        add    v1, v1, lr
    175        sub    v7, v7, lr
    176        add    v3, v3, a3
    177        sub    v5, v5, a3
    178        smulbt lr, ip, a4
    179        smultt a3, ip, a4
    180        add    v2, v2, lr
    181        sub    fp, fp, lr
    182        add    v4, v4, a3
    183        ldr    a4, [a1, #(16*6)]
    184        sub    v6, v6, a3
    185 
    186        smultb lr, ip, a4
    187        smulbb a3, ip, a4
    188        add    v1, v1, lr
    189        sub    v7, v7, lr
    190        sub    v3, v3, a3
    191        add    v5, v5, a3
    192        smultt lr, ip, a4
    193        smulbt a3, ip, a4
    194        add    v2, v2, lr
    195        sub    fp, fp, lr
    196        sub    v4, v4, a3
    197        add    v6, v6, a3
    198 
    199        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
    200 
    201        ldr    ip, =W13
    202        ldr    a4, [a1, #(16*1)]
    203        ldr    lr, =W57
    204        smulbb v1, ip, a4
    205        smultb v3, ip, a4
    206        smulbb v5, lr, a4
    207        smultb v7, lr, a4
    208        smulbt v2, ip, a4
    209        smultt v4, ip, a4
    210        smulbt v6, lr, a4
    211        smultt fp, lr, a4
    212        rsb    v4, v4, #0
    213        ldr    a4, [a1, #(16*3)]
    214        rsb    v3, v3, #0
    215 
    216        smlatb v1, ip, a4, v1
    217        smlatb v3, lr, a4, v3
    218        smulbb a3, ip, a4
    219        smulbb a2, lr, a4
    220        sub    v5, v5, a3
    221        sub    v7, v7, a2
    222        smlatt v2, ip, a4, v2
    223        smlatt v4, lr, a4, v4
    224        smulbt a3, ip, a4
    225        smulbt a2, lr, a4
    226        sub    v6, v6, a3
    227        ldr    a4, [a1, #(16*5)]
    228        sub    fp, fp, a2
    229 
    230        smlabb v1, lr, a4, v1
    231        smlabb v3, ip, a4, v3
    232        smlatb v5, lr, a4, v5
    233        smlatb v7, ip, a4, v7
    234        smlabt v2, lr, a4, v2
    235        smlabt v4, ip, a4, v4
    236        smlatt v6, lr, a4, v6
    237        ldr    a3, [a1, #(16*7)]
    238        smlatt fp, ip, a4, fp
    239 
    240        smlatb v1, lr, a3, v1
    241        smlabb v3, lr, a3, v3
    242        smlatb v5, ip, a3, v5
    243        smulbb a4, ip, a3
    244        smlatt v2, lr, a3, v2
    245        sub    v7, v7, a4
    246        smlabt v4, lr, a3, v4
    247        smulbt a4, ip, a3
    248        smlatt v6, ip, a3, v6
    249        sub    fp, fp, a4
    250        .endm
    251 
    252 function idct_col_armv5te
    253        str    lr, [sp, #-4]!
    254 
    255        idct_col
    256 
    257        ldmfd  sp!, {a3, a4}
    258        adds   a2, a3, v1
    259        mov    a2, a2, lsr #20
    260        it     mi
    261        orrmi  a2, a2, #0xf000
    262        add    ip, a4, v2
    263        mov    ip, ip, asr #20
    264        orr    a2, a2, ip, lsl #16
    265        str    a2, [a1]
    266        subs   a3, a3, v1
    267        mov    a2, a3, lsr #20
    268        it     mi
    269        orrmi  a2, a2, #0xf000
    270        sub    a4, a4, v2
    271        mov    a4, a4, asr #20
    272        orr    a2, a2, a4, lsl #16
    273        ldmfd  sp!, {a3, a4}
    274        str    a2, [a1, #(16*7)]
    275 
    276        subs   a2, a3, v3
    277        mov    a2, a2, lsr #20
    278        it     mi
    279        orrmi  a2, a2, #0xf000
    280        sub    ip, a4, v4
    281        mov    ip, ip, asr #20
    282        orr    a2, a2, ip, lsl #16
    283        str    a2, [a1, #(16*1)]
    284        adds   a3, a3, v3
    285        mov    a2, a3, lsr #20
    286        it     mi
    287        orrmi  a2, a2, #0xf000
    288        add    a4, a4, v4
    289        mov    a4, a4, asr #20
    290        orr    a2, a2, a4, lsl #16
    291        ldmfd  sp!, {a3, a4}
    292        str    a2, [a1, #(16*6)]
    293 
    294        adds   a2, a3, v5
    295        mov    a2, a2, lsr #20
    296        it     mi
    297        orrmi  a2, a2, #0xf000
    298        add    ip, a4, v6
    299        mov    ip, ip, asr #20
    300        orr    a2, a2, ip, lsl #16
    301        str    a2, [a1, #(16*2)]
    302        subs   a3, a3, v5
    303        mov    a2, a3, lsr #20
    304        it     mi
    305        orrmi  a2, a2, #0xf000
    306        sub    a4, a4, v6
    307        mov    a4, a4, asr #20
    308        orr    a2, a2, a4, lsl #16
    309        ldmfd  sp!, {a3, a4}
    310        str    a2, [a1, #(16*5)]
    311 
    312        adds   a2, a3, v7
    313        mov    a2, a2, lsr #20
    314        it     mi
    315        orrmi  a2, a2, #0xf000
    316        add    ip, a4, fp
    317        mov    ip, ip, asr #20
    318        orr    a2, a2, ip, lsl #16
    319        str    a2, [a1, #(16*3)]
    320        subs   a3, a3, v7
    321        mov    a2, a3, lsr #20
    322        it     mi
    323        orrmi  a2, a2, #0xf000
    324        sub    a4, a4, fp
    325        mov    a4, a4, asr #20
    326        orr    a2, a2, a4, lsl #16
    327        str    a2, [a1, #(16*4)]
    328 
    329        ldr    pc, [sp], #4
    330 endfunc
    331 
    332 .macro  clip   dst, src:vararg
    333        movs   \dst, \src
    334        it     mi
    335        movmi  \dst, #0
    336        cmp    \dst, #255
    337        it     gt
    338        movgt  \dst, #255
    339 .endm
    340 
    341 .macro  aclip  dst, src:vararg
    342        adds   \dst, \src
    343        it     mi
    344        movmi  \dst, #0
    345        cmp    \dst, #255
    346        it     gt
    347        movgt  \dst, #255
    348 .endm
    349 
    350 function idct_col_put_armv5te
    351        str    lr, [sp, #-4]!
    352 
    353        idct_col
    354 
    355        ldmfd  sp!, {a3, a4}
    356        ldr    lr, [sp, #32]
    357        add    a2, a3, v1
    358        clip   a2, a2, asr #20
    359        add    ip, a4, v2
    360        clip   ip, ip, asr #20
    361        orr    a2, a2, ip, lsl #8
    362        sub    a3, a3, v1
    363        clip   a3, a3, asr #20
    364        sub    a4, a4, v2
    365        clip   a4, a4, asr #20
    366        ldr    v1, [sp, #28]
    367        strh   a2, [v1]
    368        add    a2, v1, #2
    369        str    a2, [sp, #28]
    370        orr    a2, a3, a4, lsl #8
    371        rsb    v2, lr, lr, lsl #3
    372        ldmfd  sp!, {a3, a4}
    373        strh_pre a2, v2, v1
    374 
    375        sub    a2, a3, v3
    376        clip   a2, a2, asr #20
    377        sub    ip, a4, v4
    378        clip   ip, ip, asr #20
    379        orr    a2, a2, ip, lsl #8
    380        strh_pre a2, v1, lr
    381        add    a3, a3, v3
    382        clip   a2, a3, asr #20
    383        add    a4, a4, v4
    384        clip   a4, a4, asr #20
    385        orr    a2, a2, a4, lsl #8
    386        ldmfd  sp!, {a3, a4}
    387        strh_dpre a2, v2, lr
    388 
    389        add    a2, a3, v5
    390        clip   a2, a2, asr #20
    391        add    ip, a4, v6
    392        clip   ip, ip, asr #20
    393        orr    a2, a2, ip, lsl #8
    394        strh_pre a2, v1, lr
    395        sub    a3, a3, v5
    396        clip   a2, a3, asr #20
    397        sub    a4, a4, v6
    398        clip   a4, a4, asr #20
    399        orr    a2, a2, a4, lsl #8
    400        ldmfd  sp!, {a3, a4}
    401        strh_dpre a2, v2, lr
    402 
    403        add    a2, a3, v7
    404        clip   a2, a2, asr #20
    405        add    ip, a4, fp
    406        clip   ip, ip, asr #20
    407        orr    a2, a2, ip, lsl #8
    408        strh   a2, [v1, lr]
    409        sub    a3, a3, v7
    410        clip   a2, a3, asr #20
    411        sub    a4, a4, fp
    412        clip   a4, a4, asr #20
    413        orr    a2, a2, a4, lsl #8
    414        strh_dpre a2, v2, lr
    415 
    416        ldr    pc, [sp], #4
    417 endfunc
    418 
    419 function idct_col_add_armv5te
    420        str    lr, [sp, #-4]!
    421 
    422        idct_col
    423 
    424        ldr    lr, [sp, #36]
    425 
    426        ldmfd  sp!, {a3, a4}
    427        ldrh   ip, [lr]
    428        add    a2, a3, v1
    429        sub    a3, a3, v1
    430        and    v1, ip, #255
    431        aclip  a2, v1, a2, asr #20
    432        add    v1, a4, v2
    433        mov    v1, v1, asr #20
    434        aclip  v1, v1, ip, lsr #8
    435        orr    a2, a2, v1, lsl #8
    436        ldr    v1, [sp, #32]
    437        sub    a4, a4, v2
    438        rsb    v2, v1, v1, lsl #3
    439        ldrh_pre ip, v2, lr
    440        strh   a2, [lr]
    441        and    a2, ip, #255
    442        aclip  a3, a2, a3, asr #20
    443        mov    a4, a4, asr #20
    444        aclip  a4, a4, ip, lsr #8
    445        add    a2, lr, #2
    446        str    a2, [sp, #28]
    447        orr    a2, a3, a4, lsl #8
    448        strh   a2, [v2]
    449 
    450        ldmfd  sp!, {a3, a4}
    451        ldrh_pre ip, lr, v1
    452        sub    a2, a3, v3
    453        add    a3, a3, v3
    454        and    v3, ip, #255
    455        aclip  a2, v3, a2, asr #20
    456        sub    v3, a4, v4
    457        mov    v3, v3, asr #20
    458        aclip  v3, v3, ip, lsr #8
    459        orr    a2, a2, v3, lsl #8
    460        add    a4, a4, v4
    461        ldrh_dpre ip, v2, v1
    462        strh   a2, [lr]
    463        and    a2, ip, #255
    464        aclip  a3, a2, a3, asr #20
    465        mov    a4, a4, asr #20
    466        aclip  a4, a4, ip, lsr #8
    467        orr    a2, a3, a4, lsl #8
    468        strh   a2, [v2]
    469 
    470        ldmfd  sp!, {a3, a4}
    471        ldrh_pre ip, lr, v1
    472        add    a2, a3, v5
    473        sub    a3, a3, v5
    474        and    v3, ip, #255
    475        aclip  a2, v3, a2, asr #20
    476        add    v3, a4, v6
    477        mov    v3, v3, asr #20
    478        aclip  v3, v3, ip, lsr #8
    479        orr    a2, a2, v3, lsl #8
    480        sub    a4, a4, v6
    481        ldrh_dpre ip, v2, v1
    482        strh   a2, [lr]
    483        and    a2, ip, #255
    484        aclip  a3, a2, a3, asr #20
    485        mov    a4, a4, asr #20
    486        aclip  a4, a4, ip, lsr #8
    487        orr    a2, a3, a4, lsl #8
    488        strh   a2, [v2]
    489 
    490        ldmfd  sp!, {a3, a4}
    491        ldrh_pre ip, lr, v1
    492        add    a2, a3, v7
    493        sub    a3, a3, v7
    494        and    v3, ip, #255
    495        aclip  a2, v3, a2, asr #20
    496        add    v3, a4, fp
    497        mov    v3, v3, asr #20
    498        aclip  v3, v3, ip, lsr #8
    499        orr    a2, a2, v3, lsl #8
    500        sub    a4, a4, fp
    501        ldrh_dpre ip, v2, v1
    502        strh   a2, [lr]
    503        and    a2, ip, #255
    504        aclip  a3, a2, a3, asr #20
    505        mov    a4, a4, asr #20
    506        aclip  a4, a4, ip, lsr #8
    507        orr    a2, a3, a4, lsl #8
    508        strh   a2, [v2]
    509 
    510        ldr    pc, [sp], #4
    511 endfunc
    512 
    513 function ff_simple_idct_armv5te, export=1
    514        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
    515 
    516        bl     idct_row_armv5te
    517        add    a1, a1, #16
    518        bl     idct_row_armv5te
    519        add    a1, a1, #16
    520        bl     idct_row_armv5te
    521        add    a1, a1, #16
    522        bl     idct_row_armv5te
    523        add    a1, a1, #16
    524        bl     idct_row_armv5te
    525        add    a1, a1, #16
    526        bl     idct_row_armv5te
    527        add    a1, a1, #16
    528        bl     idct_row_armv5te
    529        add    a1, a1, #16
    530        bl     idct_row_armv5te
    531 
    532        sub    a1, a1, #(16*7)
    533 
    534        bl     idct_col_armv5te
    535        add    a1, a1, #4
    536        bl     idct_col_armv5te
    537        add    a1, a1, #4
    538        bl     idct_col_armv5te
    539        add    a1, a1, #4
    540        bl     idct_col_armv5te
    541 
    542        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
    543 endfunc
    544 
    545 function ff_simple_idct_add_armv5te, export=1
    546        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
    547 
    548        mov    a1, a3
    549 
    550        bl     idct_row_armv5te
    551        add    a1, a1, #16
    552        bl     idct_row_armv5te
    553        add    a1, a1, #16
    554        bl     idct_row_armv5te
    555        add    a1, a1, #16
    556        bl     idct_row_armv5te
    557        add    a1, a1, #16
    558        bl     idct_row_armv5te
    559        add    a1, a1, #16
    560        bl     idct_row_armv5te
    561        add    a1, a1, #16
    562        bl     idct_row_armv5te
    563        add    a1, a1, #16
    564        bl     idct_row_armv5te
    565 
    566        sub    a1, a1, #(16*7)
    567 
    568        bl     idct_col_add_armv5te
    569        add    a1, a1, #4
    570        bl     idct_col_add_armv5te
    571        add    a1, a1, #4
    572        bl     idct_col_add_armv5te
    573        add    a1, a1, #4
    574        bl     idct_col_add_armv5te
    575 
    576        add    sp, sp, #8
    577        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
    578 endfunc
    579 
    580 function ff_simple_idct_put_armv5te, export=1
    581        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
    582 
    583        mov    a1, a3
    584 
    585        bl     idct_row_armv5te
    586        add    a1, a1, #16
    587        bl     idct_row_armv5te
    588        add    a1, a1, #16
    589        bl     idct_row_armv5te
    590        add    a1, a1, #16
    591        bl     idct_row_armv5te
    592        add    a1, a1, #16
    593        bl     idct_row_armv5te
    594        add    a1, a1, #16
    595        bl     idct_row_armv5te
    596        add    a1, a1, #16
    597        bl     idct_row_armv5te
    598        add    a1, a1, #16
    599        bl     idct_row_armv5te
    600 
    601        sub    a1, a1, #(16*7)
    602 
    603        bl     idct_col_put_armv5te
    604        add    a1, a1, #4
    605        bl     idct_col_put_armv5te
    606        add    a1, a1, #4
    607        bl     idct_col_put_armv5te
    608        add    a1, a1, #4
    609        bl     idct_col_put_armv5te
    610 
    611        add    sp, sp, #8
    612        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
    613 endfunc