tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

intrapred_asm_sse2.asm (17735B)


      1 ;
      2 ; Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 ;
      4 ; This source code is subject to the terms of the BSD 2 Clause License and
      5 ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 ; was not distributed with this source code in the LICENSE file, you can
      7 ; obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 ; Media Patent License 1.0 was not distributed with this source code in the
      9 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 ;
     11 
     12 ;
     13 
     14 %include "third_party/x86inc/x86inc.asm"
     15 
     16 SECTION_RODATA
     17 pb_1: times 16 db 1
     18 pw_4:  times 8 dw 4
     19 pw_8:  times 8 dw 8
     20 pw_16: times 8 dw 16
     21 pw_32: times 8 dw 32
     22 dc_128: times 16 db 128
     23 pw2_4:  times 8 dw 2
     24 pw2_8:  times 8 dw 4
     25 pw2_16:  times 8 dw 8
     26 pw2_32:  times 8 dw 16
     27 
     28 SECTION .text
     29 
     30 INIT_XMM sse2
     31 cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
     32  GET_GOT     goffsetq
     33 
     34  movd                  m2, [leftq]
     35  movd                  m0, [aboveq]
     36  pxor                  m1, m1
     37  punpckldq             m0, m2
     38  psadbw                m0, m1
     39  paddw                 m0, [GLOBAL(pw_4)]
     40  psraw                 m0, 3
     41  pshuflw               m0, m0, 0x0
     42  packuswb              m0, m0
     43  movd      [dstq        ], m0
     44  movd      [dstq+strideq], m0
     45  lea                 dstq, [dstq+strideq*2]
     46  movd      [dstq        ], m0
     47  movd      [dstq+strideq], m0
     48 
     49  RESTORE_GOT
     50  RET
     51 
     52 INIT_XMM sse2
     53 cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset
     54  movifnidn          leftq, leftmp
     55  GET_GOT     goffsetq
     56 
     57  pxor                  m1, m1
     58  movd                  m0, [leftq]
     59  psadbw                m0, m1
     60  paddw                 m0, [GLOBAL(pw2_4)]
     61  psraw                 m0, 2
     62  pshuflw               m0, m0, 0x0
     63  packuswb              m0, m0
     64  movd      [dstq        ], m0
     65  movd      [dstq+strideq], m0
     66  lea                 dstq, [dstq+strideq*2]
     67  movd      [dstq        ], m0
     68  movd      [dstq+strideq], m0
     69 
     70  RESTORE_GOT
     71  RET
     72 
     73 INIT_XMM sse2
     74 cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset
     75  GET_GOT     goffsetq
     76 
     77  pxor                  m1, m1
     78  movd                  m0, [aboveq]
     79  psadbw                m0, m1
     80  paddw                 m0, [GLOBAL(pw2_4)]
     81  psraw                 m0, 2
     82  pshuflw               m0, m0, 0x0
     83  packuswb              m0, m0
     84  movd      [dstq        ], m0
     85  movd      [dstq+strideq], m0
     86  lea                 dstq, [dstq+strideq*2]
     87  movd      [dstq        ], m0
     88  movd      [dstq+strideq], m0
     89 
     90  RESTORE_GOT
     91  RET
     92 
     93 INIT_XMM sse2
     94 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
     95  GET_GOT     goffsetq
     96 
     97  pxor                  m1, m1
     98  movq                  m0, [aboveq]
     99  movq                  m2, [leftq]
    100  DEFINE_ARGS dst, stride, stride3
    101  lea             stride3q, [strideq*3]
    102  psadbw                m0, m1
    103  psadbw                m2, m1
    104  paddw                 m0, m2
    105  paddw                 m0, [GLOBAL(pw_8)]
    106  psraw                 m0, 4
    107  punpcklbw             m0, m0
    108  pshuflw               m0, m0, 0x0
    109  movq    [dstq          ], m0
    110  movq    [dstq+strideq  ], m0
    111  movq    [dstq+strideq*2], m0
    112  movq    [dstq+stride3q ], m0
    113  lea                 dstq, [dstq+strideq*4]
    114  movq    [dstq          ], m0
    115  movq    [dstq+strideq  ], m0
    116  movq    [dstq+strideq*2], m0
    117  movq    [dstq+stride3q ], m0
    118 
    119  RESTORE_GOT
    120  RET
    121 
    122 INIT_XMM sse2
    123 cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset
    124  GET_GOT     goffsetq
    125 
    126  pxor                  m1, m1
    127  movq                  m0, [aboveq]
    128  DEFINE_ARGS dst, stride, stride3
    129  lea             stride3q, [strideq*3]
    130  psadbw                m0, m1
    131  paddw                 m0, [GLOBAL(pw2_8)]
    132  psraw                 m0, 3
    133  punpcklbw             m0, m0
    134  pshuflw               m0, m0, 0x0
    135  movq    [dstq          ], m0
    136  movq    [dstq+strideq  ], m0
    137  movq    [dstq+strideq*2], m0
    138  movq    [dstq+stride3q ], m0
    139  lea                 dstq, [dstq+strideq*4]
    140  movq    [dstq          ], m0
    141  movq    [dstq+strideq  ], m0
    142  movq    [dstq+strideq*2], m0
    143  movq    [dstq+stride3q ], m0
    144 
    145  RESTORE_GOT
    146  RET
    147 
    148 INIT_XMM sse2
    149 cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset
    150  movifnidn          leftq, leftmp
    151  GET_GOT     goffsetq
    152 
    153  pxor                  m1, m1
    154  movq                  m0, [leftq]
    155  DEFINE_ARGS dst, stride, stride3
    156  lea             stride3q, [strideq*3]
    157  psadbw                m0, m1
    158  paddw                 m0, [GLOBAL(pw2_8)]
    159  psraw                 m0, 3
    160  punpcklbw             m0, m0
    161  pshuflw               m0, m0, 0x0
    162  movq    [dstq          ], m0
    163  movq    [dstq+strideq  ], m0
    164  movq    [dstq+strideq*2], m0
    165  movq    [dstq+stride3q ], m0
    166  lea                 dstq, [dstq+strideq*4]
    167  movq    [dstq          ], m0
    168  movq    [dstq+strideq  ], m0
    169  movq    [dstq+strideq*2], m0
    170  movq    [dstq+stride3q ], m0
    171 
    172  RESTORE_GOT
    173  RET
    174 
    175 INIT_XMM sse2
    176 cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset
    177  GET_GOT     goffsetq
    178 
    179  DEFINE_ARGS dst, stride, stride3
    180  lea             stride3q, [strideq*3]
    181  movd     m0,        [GLOBAL(dc_128)]
    182  movd    [dstq          ], m0
    183  movd    [dstq+strideq  ], m0
    184  movd    [dstq+strideq*2], m0
    185  movd    [dstq+stride3q ], m0
    186  RESTORE_GOT
    187  RET
    188 
    189 INIT_XMM sse2
    190 cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset
    191  GET_GOT     goffsetq
    192 
    193  DEFINE_ARGS dst, stride, stride3
    194  lea             stride3q, [strideq*3]
    195  movq    m0,        [GLOBAL(dc_128)]
    196  movq    [dstq          ], m0
    197  movq    [dstq+strideq  ], m0
    198  movq    [dstq+strideq*2], m0
    199  movq    [dstq+stride3q ], m0
    200  lea                 dstq, [dstq+strideq*4]
    201  movq    [dstq          ], m0
    202  movq    [dstq+strideq  ], m0
    203  movq    [dstq+strideq*2], m0
    204  movq    [dstq+stride3q ], m0
    205  RESTORE_GOT
    206  RET
    207 
    208 INIT_XMM sse2
    209 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
    210  GET_GOT     goffsetq
    211 
    212  pxor                  m1, m1
    213  mova                  m0, [aboveq]
    214  mova                  m2, [leftq]
    215  DEFINE_ARGS dst, stride, stride3, lines4
    216  lea             stride3q, [strideq*3]
    217  mov              lines4d, 4
    218  psadbw                m0, m1
    219  psadbw                m2, m1
    220  paddw                 m0, m2
    221  movhlps               m2, m0
    222  paddw                 m0, m2
    223  paddw                 m0, [GLOBAL(pw_16)]
    224  psraw                 m0, 5
    225  pshuflw               m0, m0, 0x0
    226  punpcklqdq            m0, m0
    227  packuswb              m0, m0
    228 .loop:
    229  mova    [dstq          ], m0
    230  mova    [dstq+strideq  ], m0
    231  mova    [dstq+strideq*2], m0
    232  mova    [dstq+stride3q ], m0
    233  lea                 dstq, [dstq+strideq*4]
    234  dec              lines4d
    235  jnz .loop
    236 
    237  RESTORE_GOT
    238  REP_RET
    239 
    240 
    241 INIT_XMM sse2
    242 cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
    243  GET_GOT     goffsetq
    244 
    245  pxor                  m1, m1
    246  mova                  m0, [aboveq]
    247  DEFINE_ARGS dst, stride, stride3, lines4
    248  lea             stride3q, [strideq*3]
    249  mov              lines4d, 4
    250  psadbw                m0, m1
    251  movhlps               m2, m0
    252  paddw                 m0, m2
    253  paddw                 m0, [GLOBAL(pw2_16)]
    254  psraw                 m0, 4
    255  pshuflw               m0, m0, 0x0
    256  punpcklqdq            m0, m0
    257  packuswb              m0, m0
    258 .loop:
    259  mova    [dstq          ], m0
    260  mova    [dstq+strideq  ], m0
    261  mova    [dstq+strideq*2], m0
    262  mova    [dstq+stride3q ], m0
    263  lea                 dstq, [dstq+strideq*4]
    264  dec              lines4d
    265  jnz .loop
    266 
    267  RESTORE_GOT
    268  REP_RET
    269 
    270 INIT_XMM sse2
    271 cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
    272  GET_GOT     goffsetq
    273 
    274  pxor                  m1, m1
    275  mova                  m0, [leftq]
    276  DEFINE_ARGS dst, stride, stride3, lines4
    277  lea             stride3q, [strideq*3]
    278  mov              lines4d, 4
    279  psadbw                m0, m1
    280  movhlps               m2, m0
    281  paddw                 m0, m2
    282  paddw                 m0, [GLOBAL(pw2_16)]
    283  psraw                 m0, 4
    284  pshuflw               m0, m0, 0x0
    285  punpcklqdq            m0, m0
    286  packuswb              m0, m0
    287 .loop:
    288  mova    [dstq          ], m0
    289  mova    [dstq+strideq  ], m0
    290  mova    [dstq+strideq*2], m0
    291  mova    [dstq+stride3q ], m0
    292  lea                 dstq, [dstq+strideq*4]
    293  dec              lines4d
    294  jnz .loop
    295 
    296  RESTORE_GOT
    297  REP_RET
    298 
    299 INIT_XMM sse2
    300 cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
    301  GET_GOT     goffsetq
    302 
    303  DEFINE_ARGS dst, stride, stride3, lines4
    304  lea             stride3q, [strideq*3]
    305  mov              lines4d, 4
    306  mova    m0,        [GLOBAL(dc_128)]
    307 .loop:
    308  mova    [dstq          ], m0
    309  mova    [dstq+strideq  ], m0
    310  mova    [dstq+strideq*2], m0
    311  mova    [dstq+stride3q ], m0
    312  lea                 dstq, [dstq+strideq*4]
    313  dec              lines4d
    314  jnz .loop
    315  RESTORE_GOT
    316  RET
    317 
    318 
    319 INIT_XMM sse2
    320 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
    321  GET_GOT     goffsetq
    322 
    323  pxor                  m1, m1
    324  mova                  m0, [aboveq]
    325  mova                  m2, [aboveq+16]
    326  mova                  m3, [leftq]
    327  mova                  m4, [leftq+16]
    328  DEFINE_ARGS dst, stride, stride3, lines4
    329  lea             stride3q, [strideq*3]
    330  mov              lines4d, 8
    331  psadbw                m0, m1
    332  psadbw                m2, m1
    333  psadbw                m3, m1
    334  psadbw                m4, m1
    335  paddw                 m0, m2
    336  paddw                 m0, m3
    337  paddw                 m0, m4
    338  movhlps               m2, m0
    339  paddw                 m0, m2
    340  paddw                 m0, [GLOBAL(pw_32)]
    341  psraw                 m0, 6
    342  pshuflw               m0, m0, 0x0
    343  punpcklqdq            m0, m0
    344  packuswb              m0, m0
    345 .loop:
    346  mova [dstq             ], m0
    347  mova [dstq          +16], m0
    348  mova [dstq+strideq     ], m0
    349  mova [dstq+strideq  +16], m0
    350  mova [dstq+strideq*2   ], m0
    351  mova [dstq+strideq*2+16], m0
    352  mova [dstq+stride3q    ], m0
    353  mova [dstq+stride3q +16], m0
    354  lea                 dstq, [dstq+strideq*4]
    355  dec              lines4d
    356  jnz .loop
    357 
    358  RESTORE_GOT
    359  REP_RET
    360 
    361 INIT_XMM sse2
    362 cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
    363  GET_GOT     goffsetq
    364 
    365  pxor                  m1, m1
    366  mova                  m0, [aboveq]
    367  mova                  m2, [aboveq+16]
    368  DEFINE_ARGS dst, stride, stride3, lines4
    369  lea             stride3q, [strideq*3]
    370  mov              lines4d, 8
    371  psadbw                m0, m1
    372  psadbw                m2, m1
    373  paddw                 m0, m2
    374  movhlps               m2, m0
    375  paddw                 m0, m2
    376  paddw                 m0, [GLOBAL(pw2_32)]
    377  psraw                 m0, 5
    378  pshuflw               m0, m0, 0x0
    379  punpcklqdq            m0, m0
    380  packuswb              m0, m0
    381 .loop:
    382  mova [dstq             ], m0
    383  mova [dstq          +16], m0
    384  mova [dstq+strideq     ], m0
    385  mova [dstq+strideq  +16], m0
    386  mova [dstq+strideq*2   ], m0
    387  mova [dstq+strideq*2+16], m0
    388  mova [dstq+stride3q    ], m0
    389  mova [dstq+stride3q +16], m0
    390  lea                 dstq, [dstq+strideq*4]
    391  dec              lines4d
    392  jnz .loop
    393 
    394  RESTORE_GOT
    395  REP_RET
    396 
    397 INIT_XMM sse2
    398 cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
    399  GET_GOT     goffsetq
    400 
    401  pxor                  m1, m1
    402  mova                  m0, [leftq]
    403  mova                  m2, [leftq+16]
    404  DEFINE_ARGS dst, stride, stride3, lines4
    405  lea             stride3q, [strideq*3]
    406  mov              lines4d, 8
    407  psadbw                m0, m1
    408  psadbw                m2, m1
    409  paddw                 m0, m2
    410  movhlps               m2, m0
    411  paddw                 m0, m2
    412  paddw                 m0, [GLOBAL(pw2_32)]
    413  psraw                 m0, 5
    414  pshuflw               m0, m0, 0x0
    415  punpcklqdq            m0, m0
    416  packuswb              m0, m0
    417 .loop:
    418  mova [dstq             ], m0
    419  mova [dstq          +16], m0
    420  mova [dstq+strideq     ], m0
    421  mova [dstq+strideq  +16], m0
    422  mova [dstq+strideq*2   ], m0
    423  mova [dstq+strideq*2+16], m0
    424  mova [dstq+stride3q    ], m0
    425  mova [dstq+stride3q +16], m0
    426  lea                 dstq, [dstq+strideq*4]
    427  dec              lines4d
    428  jnz .loop
    429 
    430  RESTORE_GOT
    431  REP_RET
    432 
    433 INIT_XMM sse2
    434 cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset
    435  GET_GOT     goffsetq
    436 
    437  DEFINE_ARGS dst, stride, stride3, lines4
    438  lea             stride3q, [strideq*3]
    439  mov              lines4d, 8
    440  mova    m0,        [GLOBAL(dc_128)]
    441 .loop:
    442  mova [dstq             ], m0
    443  mova [dstq          +16], m0
    444  mova [dstq+strideq     ], m0
    445  mova [dstq+strideq  +16], m0
    446  mova [dstq+strideq*2   ], m0
    447  mova [dstq+strideq*2+16], m0
    448  mova [dstq+stride3q    ], m0
    449  mova [dstq+stride3q +16], m0
    450  lea                 dstq, [dstq+strideq*4]
    451  dec              lines4d
    452  jnz .loop
    453  RESTORE_GOT
    454  RET
    455 
    456 INIT_XMM sse2
    457 cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
    458  movd                  m0, [aboveq]
    459  movd      [dstq        ], m0
    460  movd      [dstq+strideq], m0
    461  lea                 dstq, [dstq+strideq*2]
    462  movd      [dstq        ], m0
    463  movd      [dstq+strideq], m0
    464  RET
    465 
    466 INIT_XMM sse2
    467 cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
    468  movq                  m0, [aboveq]
    469  DEFINE_ARGS dst, stride, stride3
    470  lea             stride3q, [strideq*3]
    471  movq    [dstq          ], m0
    472  movq    [dstq+strideq  ], m0
    473  movq    [dstq+strideq*2], m0
    474  movq    [dstq+stride3q ], m0
    475  lea                 dstq, [dstq+strideq*4]
    476  movq    [dstq          ], m0
    477  movq    [dstq+strideq  ], m0
    478  movq    [dstq+strideq*2], m0
    479  movq    [dstq+stride3q ], m0
    480  RET
    481 
    482 INIT_XMM sse2
    483 cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
    484  mova                  m0, [aboveq]
    485  DEFINE_ARGS dst, stride, stride3, nlines4
    486  lea             stride3q, [strideq*3]
    487  mov              nlines4d, 4
    488 .loop:
    489  mova    [dstq          ], m0
    490  mova    [dstq+strideq  ], m0
    491  mova    [dstq+strideq*2], m0
    492  mova    [dstq+stride3q ], m0
    493  lea                 dstq, [dstq+strideq*4]
    494  dec             nlines4d
    495  jnz .loop
    496  REP_RET
    497 
    498 INIT_XMM sse2
    499 cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
    500  mova                  m0, [aboveq]
    501  mova                  m1, [aboveq+16]
    502  DEFINE_ARGS dst, stride, stride3, nlines4
    503  lea             stride3q, [strideq*3]
    504  mov              nlines4d, 8
    505 .loop:
    506  mova [dstq             ], m0
    507  mova [dstq          +16], m1
    508  mova [dstq+strideq     ], m0
    509  mova [dstq+strideq  +16], m1
    510  mova [dstq+strideq*2   ], m0
    511  mova [dstq+strideq*2+16], m1
    512  mova [dstq+stride3q    ], m0
    513  mova [dstq+stride3q +16], m1
    514  lea                 dstq, [dstq+strideq*4]
    515  dec             nlines4d
    516  jnz .loop
    517  REP_RET
    518 
    519 INIT_XMM sse2
    520 cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left
    521  movifnidn          leftq, leftmp
    522  movd                  m0, [leftq]
    523  punpcklbw             m0, m0
    524  punpcklbw             m0, m0
    525  pshufd                m1, m0, 0x1
    526  movd      [dstq        ], m0
    527  movd      [dstq+strideq], m1
    528  pshufd                m2, m0, 0x2
    529  lea                 dstq, [dstq+strideq*2]
    530  pshufd                m3, m0, 0x3
    531  movd      [dstq        ], m2
    532  movd      [dstq+strideq], m3
    533  RET
    534 
    535 INIT_XMM sse2
    536 cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left
    537  movifnidn          leftq, leftmp
    538  mov                lineq, -2
    539  DEFINE_ARGS  dst, stride, line, left, stride3
    540  lea             stride3q, [strideq*3]
    541  movq                  m0, [leftq    ]
    542  punpcklbw             m0, m0              ; l1 l1 l2 l2 ... l8 l8
    543 .loop:
    544  pshuflw               m1, m0, 0x0         ; l1 l1 l1 l1 l1 l1 l1 l1
    545  pshuflw               m2, m0, 0x55        ; l2 l2 l2 l2 l2 l2 l2 l2
    546  movq      [dstq        ], m1
    547  movq      [dstq+strideq], m2
    548  pshuflw               m1, m0, 0xaa
    549  pshuflw               m2, m0, 0xff
    550  movq    [dstq+strideq*2], m1
    551  movq    [dstq+stride3q ], m2
    552  pshufd                m0, m0, 0xe         ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8
    553  inc                lineq
    554  lea                 dstq, [dstq+strideq*4]
    555  jnz .loop
    556  REP_RET
    557 
    558 INIT_XMM sse2
    559 cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left
    560  movifnidn          leftq, leftmp
    561  mov                lineq, -4
    562  DEFINE_ARGS dst, stride, line, left, stride3
    563  lea             stride3q, [strideq*3]
    564 .loop:
    565  movd                  m0, [leftq]
    566  punpcklbw             m0, m0
    567  punpcklbw             m0, m0              ; l1 to l4 each repeated 4 times
    568  pshufd            m1, m0, 0x0             ; l1 repeated 16 times
    569  pshufd            m2, m0, 0x55            ; l2 repeated 16 times
    570  mova    [dstq          ], m1
    571  mova    [dstq+strideq  ], m2
    572  pshufd            m1, m0, 0xaa
    573  pshufd            m2, m0, 0xff
    574  mova    [dstq+strideq*2], m1
    575  mova    [dstq+stride3q ], m2
    576  inc                lineq
    577  lea                leftq, [leftq+4       ]
    578  lea                 dstq, [dstq+strideq*4]
    579  jnz .loop
    580  REP_RET
    581 
    582 INIT_XMM sse2
    583 cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left
    584  movifnidn              leftq, leftmp
    585  mov                    lineq, -8
    586  DEFINE_ARGS dst, stride, line, left, stride3
    587  lea                 stride3q, [strideq*3]
    588 .loop:
    589  movd                      m0, [leftq]
    590  punpcklbw                 m0, m0
    591  punpcklbw                 m0, m0              ; l1 to l4 each repeated 4 times
    592  pshufd                m1, m0, 0x0             ; l1 repeated 16 times
    593  pshufd                m2, m0, 0x55            ; l2 repeated 16 times
    594  mova     [dstq             ], m1
    595  mova     [dstq+16          ], m1
    596  mova     [dstq+strideq     ], m2
    597  mova     [dstq+strideq+16  ], m2
    598  pshufd                m1, m0, 0xaa
    599  pshufd                m2, m0, 0xff
    600  mova     [dstq+strideq*2   ], m1
    601  mova     [dstq+strideq*2+16], m1
    602  mova     [dstq+stride3q    ], m2
    603  mova     [dstq+stride3q+16 ], m2
    604  inc                    lineq
    605  lea                    leftq, [leftq+4       ]
    606  lea                     dstq, [dstq+strideq*4]
    607  jnz .loop
    608  REP_RET