tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

idctdsp.asm (3918B)


      1 ;******************************************************************************
      2 ;* SIMD-optimized IDCT-related routines
      3 ;* Copyright (c) 2008 Loren Merritt
      4 ;* Copyright (c) 2003-2013 Michael Niedermayer
      5 ;* Copyright (c) 2013 Daniel Kang
      6 ;*
      7 ;* This file is part of FFmpeg.
      8 ;*
      9 ;* FFmpeg is free software; you can redistribute it and/or
     10 ;* modify it under the terms of the GNU Lesser General Public
     11 ;* License as published by the Free Software Foundation; either
     12 ;* version 2.1 of the License, or (at your option) any later version.
     13 ;*
     14 ;* FFmpeg is distributed in the hope that it will be useful,
     15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17 ;* Lesser General Public License for more details.
     18 ;*
     19 ;* You should have received a copy of the GNU Lesser General Public
     20 ;* License along with FFmpeg; if not, write to the Free Software
     21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     22 ;******************************************************************************
     23 
     24 %include "libavutil/x86/x86util.asm"
     25 
     26 SECTION_RODATA
     27 
     28 cextern pb_80
     29 
     30 SECTION .text
     31 
     32 ;--------------------------------------------------------------------------
     33 ;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
     34 ;                                  ptrdiff_t line_size)
     35 ;--------------------------------------------------------------------------
     36 
     37 %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
     38    mova     m1, [blockq+mmsize*0+%1]
     39    mova     m2, [blockq+mmsize*2+%1]
     40    packsswb m1, [blockq+mmsize*1+%1]
     41    packsswb m2, [blockq+mmsize*3+%1]
     42    paddb    m1, m0
     43    paddb    m2, m0
     44    movq     [pixelsq+lsizeq*0], m1
     45    movhps   [pixelsq+lsizeq*1], m1
     46    movq     [pixelsq+lsizeq*2], m2
     47    movhps   [pixelsq+lsize3q ], m2
     48 %endmacro
     49 
     50 INIT_XMM sse2
     51 cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
     52    mova     m0, [pb_80]
     53    lea      lsize3q, [lsizeq*3]
     54    PUT_SIGNED_PIXELS_CLAMPED_HALF 0
     55    lea      pixelsq, [pixelsq+lsizeq*4]
     56    PUT_SIGNED_PIXELS_CLAMPED_HALF 64
     57    RET
     58 
     59 ;--------------------------------------------------------------------------
     60 ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
     61 ;                            ptrdiff_t line_size);
     62 ;--------------------------------------------------------------------------
     63 ; %1 = block offset
     64 %macro PUT_PIXELS_CLAMPED_HALF 1
     65    mova     m0, [blockq+mmsize*0+%1]
     66    mova     m1, [blockq+mmsize*2+%1]
     67    packuswb m0, [blockq+mmsize*1+%1]
     68    packuswb m1, [blockq+mmsize*3+%1]
     69    movq           [pixelsq], m0
     70    movhps  [lsizeq+pixelsq], m0
     71    movq  [2*lsizeq+pixelsq], m1
     72    movhps [lsize3q+pixelsq], m1
     73 %endmacro
     74 
     75 INIT_XMM sse2
     76 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
     77    lea lsize3q, [lsizeq*3]
     78    PUT_PIXELS_CLAMPED_HALF 0
     79    lea pixelsq, [pixelsq+lsizeq*4]
     80    PUT_PIXELS_CLAMPED_HALF 64
     81    RET
     82 
     83 ;--------------------------------------------------------------------------
     84 ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
     85 ;                            ptrdiff_t line_size);
     86 ;--------------------------------------------------------------------------
     87 ; %1 = block offset
     88 %macro ADD_PIXELS_CLAMPED 1
     89    mova       m0, [blockq+mmsize*0+%1]
     90    mova       m1, [blockq+mmsize*1+%1]
     91    movq       m2, [pixelsq]
     92    movq       m3, [pixelsq+lsizeq]
     93    punpcklbw  m2, m4
     94    punpcklbw  m3, m4
     95    paddsw     m0, m2
     96    paddsw     m1, m3
     97    packuswb   m0, m1
     98    movq       [pixelsq], m0
     99    movhps     [pixelsq+lsizeq], m0
    100 %endmacro
    101 
    102 INIT_XMM sse2
    103 cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
    104    pxor       m4, m4
    105    ADD_PIXELS_CLAMPED 0
    106    lea        pixelsq, [pixelsq+lsizeq*2]
    107    ADD_PIXELS_CLAMPED 32
    108    lea        pixelsq, [pixelsq+lsizeq*2]
    109    ADD_PIXELS_CLAMPED 64
    110    lea        pixelsq, [pixelsq+lsizeq*2]
    111    ADD_PIXELS_CLAMPED 96
    112    RET