tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

dct_sse2.asm (2687B)


      1 ;
      2 ; Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 ;
      4 ; This source code is subject to the terms of the BSD 2 Clause License and
      5 ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 ; was not distributed with this source code in the LICENSE file, you can
      7 ; obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 ; Media Patent License 1.0 was not distributed with this source code in the
      9 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 ;
     11 
     12 %define private_prefix av1
     13 
     14 %include "third_party/x86inc/x86inc.asm"
     15 
     16 SECTION .text
     17 
     18 %macro TRANSFORM_COLS 0
     19  paddw           m0,        m1
     20  movq            m4,        m0
     21  psubw           m3,        m2
     22  psubw           m4,        m3
     23  psraw           m4,        1
     24  movq            m5,        m4
     25  psubw           m5,        m1 ;b1
     26  psubw           m4,        m2 ;c1
     27  psubw           m0,        m4
     28  paddw           m3,        m5
     29                                ; m0 a0
     30  SWAP            1,         4  ; m1 c1
     31  SWAP            2,         3  ; m2 d1
     32  SWAP            3,         5  ; m3 b1
     33 %endmacro
     34 
     35 %macro TRANSPOSE_4X4 0
     36                                ; 00 01 02 03
     37                                ; 10 11 12 13
     38                                ; 20 21 22 23
     39                                ; 30 31 32 33
     40  punpcklwd       m0,        m1 ; 00 10 01 11  02 12 03 13
     41  punpcklwd       m2,        m3 ; 20 30 21 31  22 32 23 33
     42  mova            m1,        m0
     43  punpckldq       m0,        m2 ; 00 10 20 30  01 11 21 31
     44  punpckhdq       m1,        m2 ; 02 12 22 32  03 13 23 33
     45 %endmacro
     46 
     47 INIT_XMM sse2
     48 cglobal fwht4x4, 3, 4, 8, input, output, stride
     49  lea             r3q,       [inputq + strideq*4]
     50  movq            m0,        [inputq] ;a1
     51  movq            m1,        [inputq + strideq*2] ;b1
     52  movq            m2,        [r3q] ;c1
     53  movq            m3,        [r3q + strideq*2] ;d1
     54 
     55  TRANSFORM_COLS
     56  TRANSPOSE_4X4
     57  SWAP            1,         2
     58  psrldq          m1,        m0, 8
     59  psrldq          m3,        m2, 8
     60  TRANSFORM_COLS
     61  TRANSPOSE_4X4
     62 
     63  psllw           m0,        2
     64  psllw           m1,        2
     65 
     66  ; sign extension
     67  mova            m2,             m0
     68  mova            m3,             m1
     69  punpcklwd       m0,             m0
     70  punpcklwd       m1,             m1
     71  punpckhwd       m2,             m2
     72  punpckhwd       m3,             m3
     73  psrad           m0,             16
     74  psrad           m1,             16
     75  psrad           m2,             16
     76  psrad           m3,             16
     77  mova            [outputq],      m0
     78  mova            [outputq + 16], m2
     79  mova            [outputq + 32], m1
     80  mova            [outputq + 48], m3
     81 
     82  RET