tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

highbd_intrapred_asm_sse2.asm (7880B)


      1 ;
      2 ; Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 ;
      4 ; This source code is subject to the terms of the BSD 2 Clause License and
      5 ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 ; was not distributed with this source code in the LICENSE file, you can
      7 ; obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 ; Media Patent License 1.0 was not distributed with this source code in the
      9 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 ;
     11 
     12 ;
     13 
     14 %include "third_party/x86inc/x86inc.asm"
     15 
     16 SECTION_RODATA
     17 pw_4:  times 8 dw 4
     18 pw_8:  times 8 dw 8
     19 pw_16: times 4 dd 16
     20 pw_32: times 4 dd 32
     21 
     22 SECTION .text
     23 INIT_XMM sse2
     24 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
     25  GET_GOT     goffsetq
     26 
     27  movq                  m0, [aboveq]
     28  movq                  m2, [leftq]
     29  paddw                 m0, m2
     30  pshuflw               m1, m0, 0xe
     31  paddw                 m0, m1
     32  pshuflw               m1, m0, 0x1
     33  paddw                 m0, m1
     34  paddw                 m0, [GLOBAL(pw_4)]
     35  psraw                 m0, 3
     36  pshuflw               m0, m0, 0x0
     37  movq    [dstq          ], m0
     38  movq    [dstq+strideq*2], m0
     39  lea                 dstq, [dstq+strideq*4]
     40  movq    [dstq          ], m0
     41  movq    [dstq+strideq*2], m0
     42 
     43  RESTORE_GOT
     44  RET
     45 
     46 INIT_XMM sse2
     47 cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset
     48  GET_GOT     goffsetq
     49 
     50  pxor                  m1, m1
     51  mova                  m0, [aboveq]
     52  mova                  m2, [leftq]
     53  DEFINE_ARGS dst, stride, stride3, one
     54  mov                 oned, 0x00010001
     55  lea             stride3q, [strideq*3]
     56  movd                  m3, oned
     57  pshufd                m3, m3, 0x0
     58  paddw                 m0, m2
     59  pmaddwd               m0, m3
     60  packssdw              m0, m1
     61  pmaddwd               m0, m3
     62  packssdw              m0, m1
     63  pmaddwd               m0, m3
     64  paddw                 m0, [GLOBAL(pw_8)]
     65  psrlw                 m0, 4
     66  pshuflw               m0, m0, 0x0
     67  punpcklqdq            m0, m0
     68  mova   [dstq           ], m0
     69  mova   [dstq+strideq*2 ], m0
     70  mova   [dstq+strideq*4 ], m0
     71  mova   [dstq+stride3q*2], m0
     72  lea                 dstq, [dstq+strideq*8]
     73  mova   [dstq           ], m0
     74  mova   [dstq+strideq*2 ], m0
     75  mova   [dstq+strideq*4 ], m0
     76  mova   [dstq+stride3q*2], m0
     77 
     78  RESTORE_GOT
     79  RET
     80 
     81 INIT_XMM sse2
     82 cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
     83  GET_GOT     goffsetq
     84 
     85  pxor                  m1, m1
     86  mova                  m0, [aboveq]
     87  mova                  m3, [aboveq+16]
     88  mova                  m2, [leftq]
     89  mova                  m4, [leftq+16]
     90  DEFINE_ARGS dst, stride, stride3, lines4
     91  lea             stride3q, [strideq*3]
     92  mov              lines4d, 4
     93  paddw                 m0, m2
     94  paddw                 m0, m3
     95  paddw                 m0, m4
     96  movhlps               m2, m0
     97  paddw                 m0, m2
     98  punpcklwd             m0, m1
     99  movhlps               m2, m0
    100  paddd                 m0, m2
    101  punpckldq             m0, m1
    102  movhlps               m2, m0
    103  paddd                 m0, m2
    104  paddd                 m0, [GLOBAL(pw_16)]
    105  psrad                 m0, 5
    106  pshuflw               m0, m0, 0x0
    107  punpcklqdq            m0, m0
    108 .loop:
    109  mova   [dstq              ], m0
    110  mova   [dstq           +16], m0
    111  mova   [dstq+strideq*2    ], m0
    112  mova   [dstq+strideq*2 +16], m0
    113  mova   [dstq+strideq*4    ], m0
    114  mova   [dstq+strideq*4 +16], m0
    115  mova   [dstq+stride3q*2   ], m0
    116  mova   [dstq+stride3q*2+16], m0
    117  lea                 dstq, [dstq+strideq*8]
    118  dec              lines4d
    119  jnz .loop
    120 
    121  RESTORE_GOT
    122  REP_RET
    123 
    124 INIT_XMM sse2
    125 cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
    126  GET_GOT     goffsetq
    127 
    128  mova                  m0, [aboveq]
    129  mova                  m2, [aboveq+16]
    130  mova                  m3, [aboveq+32]
    131  mova                  m4, [aboveq+48]
    132  paddw                 m0, m2
    133  paddw                 m3, m4
    134  mova                  m2, [leftq]
    135  mova                  m4, [leftq+16]
    136  mova                  m5, [leftq+32]
    137  mova                  m6, [leftq+48]
    138  paddw                 m2, m4
    139  paddw                 m5, m6
    140  paddw                 m0, m3
    141  paddw                 m2, m5
    142  pxor                  m1, m1
    143  paddw                 m0, m2
    144  DEFINE_ARGS dst, stride, stride3, lines4
    145  lea             stride3q, [strideq*3]
    146  mov              lines4d, 8
    147  movhlps               m2, m0
    148  paddw                 m0, m2
    149  punpcklwd             m0, m1
    150  movhlps               m2, m0
    151  paddd                 m0, m2
    152  punpckldq             m0, m1
    153  movhlps               m2, m0
    154  paddd                 m0, m2
    155  paddd                 m0, [GLOBAL(pw_32)]
    156  psrad                 m0, 6
    157  pshuflw               m0, m0, 0x0
    158  punpcklqdq            m0, m0
    159 .loop:
    160  mova [dstq               ], m0
    161  mova [dstq          +16  ], m0
    162  mova [dstq          +32  ], m0
    163  mova [dstq          +48  ], m0
    164  mova [dstq+strideq*2     ], m0
    165  mova [dstq+strideq*2+16  ], m0
    166  mova [dstq+strideq*2+32  ], m0
    167  mova [dstq+strideq*2+48  ], m0
    168  mova [dstq+strideq*4     ], m0
    169  mova [dstq+strideq*4+16  ], m0
    170  mova [dstq+strideq*4+32  ], m0
    171  mova [dstq+strideq*4+48  ], m0
    172  mova [dstq+stride3q*2    ], m0
    173  mova [dstq+stride3q*2 +16], m0
    174  mova [dstq+stride3q*2 +32], m0
    175  mova [dstq+stride3q*2 +48], m0
    176  lea                 dstq, [dstq+strideq*8]
    177  dec              lines4d
    178  jnz .loop
    179 
    180  RESTORE_GOT
    181  REP_RET
    182 
    183 INIT_XMM sse2
    184 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
    185  movq                  m0, [aboveq]
    186  movq    [dstq          ], m0
    187  movq    [dstq+strideq*2], m0
    188  lea                 dstq, [dstq+strideq*4]
    189  movq    [dstq          ], m0
    190  movq    [dstq+strideq*2], m0
    191  RET
    192 
    193 INIT_XMM sse2
    194 cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above
    195  mova                  m0, [aboveq]
    196  DEFINE_ARGS dst, stride, stride3
    197  lea             stride3q, [strideq*3]
    198  mova   [dstq           ], m0
    199  mova   [dstq+strideq*2 ], m0
    200  mova   [dstq+strideq*4 ], m0
    201  mova   [dstq+stride3q*2], m0
    202  lea                 dstq, [dstq+strideq*8]
    203  mova   [dstq           ], m0
    204  mova   [dstq+strideq*2 ], m0
    205  mova   [dstq+strideq*4 ], m0
    206  mova   [dstq+stride3q*2], m0
    207  RET
    208 
    209 INIT_XMM sse2
    210 cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above
    211  mova                  m0, [aboveq]
    212  mova                  m1, [aboveq+16]
    213  DEFINE_ARGS dst, stride, stride3, nlines4
    214  lea             stride3q, [strideq*3]
    215  mov              nlines4d, 4
    216 .loop:
    217  mova    [dstq              ], m0
    218  mova    [dstq           +16], m1
    219  mova    [dstq+strideq*2    ], m0
    220  mova    [dstq+strideq*2 +16], m1
    221  mova    [dstq+strideq*4    ], m0
    222  mova    [dstq+strideq*4 +16], m1
    223  mova    [dstq+stride3q*2   ], m0
    224  mova    [dstq+stride3q*2+16], m1
    225  lea                 dstq, [dstq+strideq*8]
    226  dec             nlines4d
    227  jnz .loop
    228  REP_RET
    229 
    230 INIT_XMM sse2
    231 cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
    232  mova                  m0, [aboveq]
    233  mova                  m1, [aboveq+16]
    234  mova                  m2, [aboveq+32]
    235  mova                  m3, [aboveq+48]
    236  DEFINE_ARGS dst, stride, stride3, nlines4
    237  lea             stride3q, [strideq*3]
    238  mov              nlines4d, 8
    239 .loop:
    240  mova [dstq               ], m0
    241  mova [dstq            +16], m1
    242  mova [dstq            +32], m2
    243  mova [dstq            +48], m3
    244  mova [dstq+strideq*2     ], m0
    245  mova [dstq+strideq*2  +16], m1
    246  mova [dstq+strideq*2  +32], m2
    247  mova [dstq+strideq*2  +48], m3
    248  mova [dstq+strideq*4     ], m0
    249  mova [dstq+strideq*4  +16], m1
    250  mova [dstq+strideq*4  +32], m2
    251  mova [dstq+strideq*4  +48], m3
    252  mova [dstq+stride3q*2    ], m0
    253  mova [dstq+stride3q*2 +16], m1
    254  mova [dstq+stride3q*2 +32], m2
    255  mova [dstq+stride3q*2 +48], m3
    256  lea                 dstq, [dstq+strideq*8]
    257  dec             nlines4d
    258  jnz .loop
    259  REP_RET