tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jrevdct_arm.S (13553B)


      1 /*
      2   C-like prototype :
      3        void j_rev_dct_arm(DCTBLOCK data)
      4 
      5   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
      6 
      7   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
      8 
      9   Permission is hereby granted, free of charge, to any person obtaining a copy
     10   of this software and associated documentation files (the "Software"), to deal
     11   in the Software without restriction, including without limitation the rights
     12   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     13   copies of the Software, and to permit persons to whom the Software is
     14   furnished to do so, subject to the following conditions:
     15 
     16   The above copyright notice and this permission notice shall be included in
     17   all copies or substantial portions of the Software.
     18 
     19   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     20   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     21   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     22   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
     23   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     24   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26 */
     27 
     28 #include "libavutil/arm/asm.S"
     29 
     30 #define FIX_0_298631336 2446
     31 #define FIX_0_541196100 4433
     32 #define FIX_0_765366865 6270
     33 #define FIX_1_175875602 9633
     34 #define FIX_1_501321110 12299
     35 #define FIX_2_053119869 16819
     36 #define FIX_3_072711026 25172
     37 #define FIX_M_0_390180644 -3196
     38 #define FIX_M_0_899976223 -7373
     39 #define FIX_M_1_847759065 -15137
     40 #define FIX_M_1_961570560 -16069
     41 #define FIX_M_2_562915447 -20995
     42 #define FIX_0xFFFF 0xFFFF
     43 
     44 #define FIX_0_298631336_ID      0
     45 #define FIX_0_541196100_ID      4
     46 #define FIX_0_765366865_ID      8
     47 #define FIX_1_175875602_ID     12
     48 #define FIX_1_501321110_ID     16
     49 #define FIX_2_053119869_ID     20
     50 #define FIX_3_072711026_ID     24
     51 #define FIX_M_0_390180644_ID   28
     52 #define FIX_M_0_899976223_ID   32
     53 #define FIX_M_1_847759065_ID   36
     54 #define FIX_M_1_961570560_ID   40
     55 #define FIX_M_2_562915447_ID   44
     56 #define FIX_0xFFFF_ID          48
     57 
     58 function ff_j_rev_dct_arm, export=1
     59        push {r0, r4 - r11, lr}
     60 
     61        mov lr, r0                      @ lr = pointer to the current row
     62        mov r12, #8                     @ r12 = row-counter
     63        movrel r11, const_array         @ r11 = base pointer to the constants array
     64 row_loop:
     65        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
     66        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
     67 
     68        @ Optimization for row that have all items except the first set to 0
     69        @ (this works as the int16_t are always 4-byte aligned)
     70        ldr r5, [lr, # 0]
     71        ldr r6, [lr, # 4]
     72        ldr r3, [lr, # 8]
     73        ldr r4, [lr, #12]
     74        orr r3, r3, r4
     75        orr r3, r3, r6
     76        orrs r5, r3, r5
     77        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
     78        orrs r3, r3, r2
     79        beq empty_row
     80 
     81        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
     82        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
     83        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
     84 
     85        ldr r3, [r11, #FIX_0_541196100_ID]
     86        add r7, r2, r6
     87        ldr r5, [r11, #FIX_M_1_847759065_ID]
     88        mul r7, r3, r7                      @ r7 = z1
     89        ldr r3, [r11, #FIX_0_765366865_ID]
     90        mla r6, r5, r6, r7                  @ r6 = tmp2
     91        add r5, r0, r4                      @ r5 = tmp0
     92        mla r2, r3, r2, r7                  @ r2 = tmp3
     93        sub r3, r0, r4                      @ r3 = tmp1
     94 
     95        add r0, r2, r5, lsl #13             @ r0 = tmp10
     96        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
     97        add r4, r6, r3, lsl #13             @ r4 = tmp11
     98        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
     99 
    100        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
    101 
    102        ldrsh r3, [lr, #10]             @ r3 = 'd3'
    103        ldrsh r5, [lr, #12]             @ r5 = 'd5'
    104        ldrsh r7, [lr, #14]             @ r7 = 'd7'
    105 
    106        add r0, r3, r5                        @ r0 = 'z2'
    107        add r2, r1, r7                  @ r2 = 'z1'
    108        add r4, r3, r7                  @ r4 = 'z3'
    109        add r6, r1, r5                  @ r6 = 'z4'
    110        ldr r9, [r11, #FIX_1_175875602_ID]
    111        add r8, r4, r6                  @ r8 = z3 + z4
    112        ldr r10, [r11, #FIX_M_0_899976223_ID]
    113        mul r8, r9, r8                  @ r8 = 'z5'
    114        ldr r9, [r11, #FIX_M_2_562915447_ID]
    115        mul r2, r10, r2                 @ r2 = 'z1'
    116        ldr r10, [r11, #FIX_M_1_961570560_ID]
    117        mul r0, r9, r0                  @ r0 = 'z2'
    118        ldr r9, [r11, #FIX_M_0_390180644_ID]
    119        mla r4, r10, r4, r8             @ r4 = 'z3'
    120        ldr r10, [r11, #FIX_0_298631336_ID]
    121        mla r6, r9, r6, r8              @ r6 = 'z4'
    122        ldr r9, [r11, #FIX_2_053119869_ID]
    123        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
    124        ldr r10, [r11, #FIX_3_072711026_ID]
    125        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
    126        ldr r9, [r11, #FIX_1_501321110_ID]
    127        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
    128        add r7, r7, r4                  @ r7 = tmp0
    129        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
    130        add r5,        r5, r6                  @ r5 = tmp1
    131        add r3, r3, r4                  @ r3 = tmp2
    132        add r1, r1, r6                  @ r1 = tmp3
    133 
    134        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
    135                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
    136 
    137        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
    138        add r8, r0, r1
    139        add r8, r8, #(1<<10)
    140        mov r8, r8, asr #11
    141        strh r8, [lr, # 0]
    142 
    143        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
    144        sub r8, r0, r1
    145        add r8, r8, #(1<<10)
    146        mov r8, r8, asr #11
    147        strh r8, [lr, #14]
    148 
    149        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
    150        add r8, r6, r3
    151        add r8, r8, #(1<<10)
    152        mov r8, r8, asr #11
    153        strh r8, [lr, # 2]
    154 
    155        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
    156        sub r8, r6, r3
    157        add r8, r8, #(1<<10)
    158        mov r8, r8, asr #11
    159        strh r8, [lr, #12]
    160 
    161        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
    162        add r8, r4, r5
    163        add r8, r8, #(1<<10)
    164        mov r8, r8, asr #11
    165        strh r8, [lr, # 4]
    166 
    167        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
    168        sub r8, r4, r5
    169        add r8, r8, #(1<<10)
    170        mov r8, r8, asr #11
    171        strh r8, [lr, #10]
    172 
    173        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
    174        add r8, r2, r7
    175        add r8, r8, #(1<<10)
    176        mov r8, r8, asr #11
    177        strh r8, [lr, # 6]
    178 
    179        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
    180        sub r8, r2, r7
    181        add r8, r8, #(1<<10)
    182        mov r8, r8, asr #11
    183        strh r8, [lr, # 8]
    184 
    185        @ End of row loop
    186        add lr, lr, #16
    187        subs r12, r12, #1
    188        bne row_loop
    189        beq start_column_loop
    190 
    191 empty_row:
    192        ldr r1, [r11, #FIX_0xFFFF_ID]
    193        mov r0, r0, lsl #2
    194        and r0, r0, r1
    195        add r0, r0, r0, lsl #16
    196        str r0, [lr, # 0]
    197        str r0, [lr, # 4]
    198        str r0, [lr, # 8]
    199        str r0, [lr, #12]
    200 
    201 end_of_row_loop:
    202        @ End of loop
    203        add lr, lr, #16
    204        subs r12, r12, #1
    205        bne row_loop
    206 
    207 start_column_loop:
    208        @ Start of column loop
    209        pop {lr}
    210        mov r12, #8
    211 column_loop:
    212        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
    213        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
    214        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
    215        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
    216 
    217        ldr r3, [r11, #FIX_0_541196100_ID]
    218        add r1, r2, r6
    219        ldr r5, [r11, #FIX_M_1_847759065_ID]
    220        mul r1, r3, r1                      @ r1 = z1
    221        ldr r3, [r11, #FIX_0_765366865_ID]
    222        mla r6, r5, r6, r1                  @ r6 = tmp2
    223        add r5, r0, r4                      @ r5 = tmp0
    224        mla r2, r3, r2, r1                  @ r2 = tmp3
    225        sub r3, r0, r4                      @ r3 = tmp1
    226 
    227        add r0, r2, r5, lsl #13             @ r0 = tmp10
    228        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
    229        add r4, r6, r3, lsl #13             @ r4 = tmp11
    230        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
    231 
    232        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
    233        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
    234        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
    235        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
    236 
    237        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
    238        orr r9, r1, r3
    239        orr r10, r5, r7
    240        orrs r10, r9, r10
    241        beq empty_odd_column
    242 
    243        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
    244 
    245        add r0, r3, r5                  @ r0 = 'z2'
    246        add r2, r1, r7                  @ r2 = 'z1'
    247        add r4, r3, r7                  @ r4 = 'z3'
    248        add r6, r1, r5                  @ r6 = 'z4'
    249        ldr r9, [r11, #FIX_1_175875602_ID]
    250        add r8, r4, r6
    251        ldr r10, [r11, #FIX_M_0_899976223_ID]
    252        mul r8, r9, r8                  @ r8 = 'z5'
    253        ldr r9, [r11, #FIX_M_2_562915447_ID]
    254        mul r2, r10, r2                 @ r2 = 'z1'
    255        ldr r10, [r11, #FIX_M_1_961570560_ID]
    256        mul r0, r9, r0                  @ r0 = 'z2'
    257        ldr r9, [r11, #FIX_M_0_390180644_ID]
    258        mla r4, r10, r4, r8             @ r4 = 'z3'
    259        ldr r10, [r11, #FIX_0_298631336_ID]
    260        mla r6, r9, r6, r8              @ r6 = 'z4'
    261        ldr r9, [r11, #FIX_2_053119869_ID]
    262        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
    263        ldr r10, [r11, #FIX_3_072711026_ID]
    264        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
    265        ldr r9, [r11, #FIX_1_501321110_ID]
    266        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
    267        add r7, r7, r4                  @ r7 = tmp0
    268        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
    269        add r5,        r5, r6                  @ r5 = tmp1
    270        add r3, r3, r4                  @ r3 = tmp2
    271        add r1, r1, r6                  @ r1 = tmp3
    272 
    273        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
    274                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
    275 
    276        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
    277        add r8, r0, r1
    278        add r8, r8, #(1<<17)
    279        mov r8, r8, asr #18
    280        strh r8, [lr, #( 0*8)]
    281 
    282        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
    283        sub r8, r0, r1
    284        add r8, r8, #(1<<17)
    285        mov r8, r8, asr #18
    286        strh r8, [lr, #(14*8)]
    287 
    288        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
    289        add r8, r4, r3
    290        add r8, r8, #(1<<17)
    291        mov r8, r8, asr #18
    292        strh r8, [lr, #( 2*8)]
    293 
    294        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
    295        sub r8, r4, r3
    296        add r8, r8, #(1<<17)
    297        mov r8, r8, asr #18
    298        strh r8, [lr, #(12*8)]
    299 
    300        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
    301        add r8, r6, r5
    302        add r8, r8, #(1<<17)
    303        mov r8, r8, asr #18
    304        strh r8, [lr, #( 4*8)]
    305 
    306        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
    307        sub r8, r6, r5
    308        add r8, r8, #(1<<17)
    309        mov r8, r8, asr #18
    310        strh r8, [lr, #(10*8)]
    311 
    312        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
    313        add r8, r2, r7
    314        add r8, r8, #(1<<17)
    315        mov r8, r8, asr #18
    316        strh r8, [lr, #( 6*8)]
    317 
    318        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
    319        sub r8, r2, r7
    320        add r8, r8, #(1<<17)
    321        mov r8, r8, asr #18
    322        strh r8, [lr, #( 8*8)]
    323 
    324        @ End of row loop
    325        add lr, lr, #2
    326        subs r12, r12, #1
    327        bne column_loop
    328        beq the_end
    329 
    330 empty_odd_column:
    331        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
    332        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
    333        add r0, r0, #(1<<17)
    334        mov r0, r0, asr #18
    335        strh r0, [lr, #( 0*8)]
    336        strh r0, [lr, #(14*8)]
    337 
    338        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
    339        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
    340        add r4, r4, #(1<<17)
    341        mov r4, r4, asr #18
    342        strh r4, [lr, #( 2*8)]
    343        strh r4, [lr, #(12*8)]
    344 
    345        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
    346        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
    347        add r6, r6, #(1<<17)
    348        mov r6, r6, asr #18
    349        strh r6, [lr, #( 4*8)]
    350        strh r6, [lr, #(10*8)]
    351 
    352        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
    353        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
    354        add r2, r2, #(1<<17)
    355        mov r2, r2, asr #18
    356        strh r2, [lr, #( 6*8)]
    357        strh r2, [lr, #( 8*8)]
    358 
    359        @ End of row loop
    360        add lr, lr, #2
    361        subs r12, r12, #1
    362        bne column_loop
    363 
    364 the_end:
    365        @ The end....
    366        pop {r4 - r11, pc}
    367 endfunc
    368 
    369 const const_array
    370        .word FIX_0_298631336
    371        .word FIX_0_541196100
    372        .word FIX_0_765366865
    373        .word FIX_1_175875602
    374        .word FIX_1_501321110
    375        .word FIX_2_053119869
    376        .word FIX_3_072711026
    377        .word FIX_M_0_390180644
    378        .word FIX_M_0_899976223
    379        .word FIX_M_1_847759065
    380        .word FIX_M_1_961570560
    381        .word FIX_M_2_562915447
    382        .word FIX_0xFFFF
    383 endconst