tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MMIHelpers.h (8393B)


      1 /*
      2 ============================================================================
      3 Name        : MMIHelpers.h
      4 Author      : Heiher <r@hev.cc>
      5 Version     : 0.0.1
      6 Copyright   : Copyright (c) 2015 everyone.
      7 Description : The helpers for x86 SSE to Loongson MMI.
      8 ============================================================================
      9 */
     10 
     11 #ifndef __MMI_HELPERS_H__
     12 #define __MMI_HELPERS_H__
     13 
     14 #define __mm_packxxxx(_f, _D, _d, _s, _t)                                     \
     15  #_f " %[" #_t "], %[" #_d "h], %[" #_s "h] \n\t" #_f " %[" #_D "l], %[" #_d \
     16      "l], %[" #_s                                                            \
     17      "l] \n\t"                                                               \
     18      "punpckhwd %[" #_D "h], %[" #_D "l], %[" #_t                            \
     19      "] \n\t"                                                                \
     20      "punpcklwd %[" #_D "l], %[" #_D "l], %[" #_t "] \n\t"
     21 
     22 #define _mm_or(_D, _d, _s)              \
     23  "or %[" #_D "h], %[" #_d "h], %[" #_s \
     24  "h] \n\t"                             \
     25  "or %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
     26 
     27 #define _mm_xor(_D, _d, _s)              \
     28  "xor %[" #_D "h], %[" #_d "h], %[" #_s \
     29  "h] \n\t"                              \
     30  "xor %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
     31 
     32 #define _mm_and(_D, _d, _s)              \
     33  "and %[" #_D "h], %[" #_d "h], %[" #_s \
     34  "h] \n\t"                              \
     35  "and %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
     36 
     37 /* SSE: pandn */
     38 #define _mm_pandn(_D, _d, _s)              \
     39  "pandn %[" #_D "h], %[" #_d "h], %[" #_s \
     40  "h] \n\t"                                \
     41  "pandn %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
     42 
     43 /* SSE: pshuflw */
     44 #define _mm_pshuflh(_D, _d, _s) \
     45  "mov.d %[" #_D "h], %[" #_d   \
     46  "h] \n\t"                     \
     47  "pshufh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     48 
     49 /* SSE: psllw (bits) */
     50 #define _mm_psllh(_D, _d, _s)              \
     51  "psllh %[" #_D "h], %[" #_d "h], %[" #_s \
     52  "] \n\t"                                 \
     53  "psllh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     54 
     55 /* SSE: pslld (bits) */
     56 #define _mm_psllw(_D, _d, _s)              \
     57  "psllw %[" #_D "h], %[" #_d "h], %[" #_s \
     58  "] \n\t"                                 \
     59  "psllw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     60 
     61 /* SSE: psllq (bits) */
     62 #define _mm_pslld(_D, _d, _s)             \
     63  "dsll %[" #_D "h], %[" #_d "h], %[" #_s \
     64  "] \n\t"                                \
     65  "dsll %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     66 
     67 /* SSE: pslldq (bytes) */
     68 #define _mm_psllq(_D, _d, _s, _s64, _tf)   \
     69  "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
     70  "] \n\t"                                 \
     71  "dsrl %[" #_tf "], %[" #_d "l], %[" #_tf \
     72  "] \n\t"                                 \
     73  "dsll %[" #_D "h], %[" #_d "h], %[" #_s  \
     74  "] \n\t"                                 \
     75  "dsll %[" #_D "l], %[" #_d "l], %[" #_s  \
     76  "] \n\t"                                 \
     77  "or %[" #_D "h], %[" #_D "h], %[" #_tf "] \n\t"
     78 
     79 /* SSE: psrlw (bits) */
     80 #define _mm_psrlh(_D, _d, _s)              \
     81  "psrlh %[" #_D "h], %[" #_d "h], %[" #_s \
     82  "] \n\t"                                 \
     83  "psrlh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     84 
     85 /* SSE: psrld (bits) */
     86 #define _mm_psrlw(_D, _d, _s)              \
     87  "psrlw %[" #_D "h], %[" #_d "h], %[" #_s \
     88  "] \n\t"                                 \
     89  "psrlw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     90 
     91 /* SSE: psrlq (bits) */
     92 #define _mm_psrld(_D, _d, _s)             \
     93  "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
     94  "] \n\t"                                \
     95  "dsrl %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
     96 
     97 /* SSE: psrldq (bytes) */
     98 #define _mm_psrlq(_D, _d, _s, _s64, _tf)   \
     99  "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
    100  "] \n\t"                                 \
    101  "dsll %[" #_tf "], %[" #_d "h], %[" #_tf \
    102  "] \n\t"                                 \
    103  "dsrl %[" #_D "h], %[" #_d "h], %[" #_s  \
    104  "] \n\t"                                 \
    105  "dsrl %[" #_D "l], %[" #_d "l], %[" #_s  \
    106  "] \n\t"                                 \
    107  "or %[" #_D "l], %[" #_D "l], %[" #_tf "] \n\t"
    108 
    109 /* SSE: psrad */
    110 #define _mm_psraw(_D, _d, _s)              \
    111  "psraw %[" #_D "h], %[" #_d "h], %[" #_s \
    112  "] \n\t"                                 \
    113  "psraw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
    114 
    115 /* SSE: paddb */
    116 #define _mm_paddb(_D, _d, _s)              \
    117  "paddb %[" #_D "h], %[" #_d "h], %[" #_s \
    118  "h] \n\t"                                \
    119  "paddb %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    120 
    121 /* SSE: paddw */
    122 #define _mm_paddh(_D, _d, _s)              \
    123  "paddh %[" #_D "h], %[" #_d "h], %[" #_s \
    124  "h] \n\t"                                \
    125  "paddh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    126 
    127 /* SSE: paddd */
    128 #define _mm_paddw(_D, _d, _s)              \
    129  "paddw %[" #_D "h], %[" #_d "h], %[" #_s \
    130  "h] \n\t"                                \
    131  "paddw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    132 
    133 /* SSE: paddq */
    134 #define _mm_paddd(_D, _d, _s)             \
    135  "dadd %[" #_D "h], %[" #_d "h], %[" #_s \
    136  "h] \n\t"                               \
    137  "dadd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    138 
    139 /* SSE: psubw */
    140 #define _mm_psubh(_D, _d, _s)              \
    141  "psubh %[" #_D "h], %[" #_d "h], %[" #_s \
    142  "h] \n\t"                                \
    143  "psubh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    144 
    145 /* SSE: psubd */
    146 #define _mm_psubw(_D, _d, _s)              \
    147  "psubw %[" #_D "h], %[" #_d "h], %[" #_s \
    148  "h] \n\t"                                \
    149  "psubw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    150 
    151 /* SSE: pmaxub */
    152 #define _mm_pmaxub(_D, _d, _s)              \
    153  "pmaxub %[" #_D "h], %[" #_d "h], %[" #_s \
    154  "h] \n\t"                                 \
    155  "pmaxub %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    156 
    157 /* SSE: pmullw */
    158 #define _mm_pmullh(_D, _d, _s)              \
    159  "pmullh %[" #_D "h], %[" #_d "h], %[" #_s \
    160  "h] \n\t"                                 \
    161  "pmullh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    162 
    163 /* SSE: pmulhw */
    164 #define _mm_pmulhh(_D, _d, _s)              \
    165  "pmulhh %[" #_D "h], %[" #_d "h], %[" #_s \
    166  "h] \n\t"                                 \
    167  "pmulhh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    168 
    169 /* SSE: pmuludq */
    170 #define _mm_pmuluw(_D, _d, _s)              \
    171  "pmuluw %[" #_D "h], %[" #_d "h], %[" #_s \
    172  "h] \n\t"                                 \
    173  "pmuluw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    174 
    175 /* SSE: packsswb */
    176 #define _mm_packsshb(_D, _d, _s, _t) __mm_packxxxx(packsshb, _D, _d, _s, _t)
    177 
    178 /* SSE: packssdw */
    179 #define _mm_packsswh(_D, _d, _s, _t) __mm_packxxxx(packsswh, _D, _d, _s, _t)
    180 
    181 /* SSE: packuswb */
    182 #define _mm_packushb(_D, _d, _s, _t) __mm_packxxxx(packushb, _D, _d, _s, _t)
    183 
    184 /* SSE: punpcklbw */
    185 #define _mm_punpcklbh(_D, _d, _s)              \
    186  "punpckhbh %[" #_D "h], %[" #_d "l], %[" #_s \
    187  "l] \n\t"                                    \
    188  "punpcklbh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    189 
    190 /* SSE: punpcklwd */
    191 #define _mm_punpcklhw(_D, _d, _s)              \
    192  "punpckhhw %[" #_D "h], %[" #_d "l], %[" #_s \
    193  "l] \n\t"                                    \
    194  "punpcklhw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    195 
    196 /* SSE: punpckldq */
    197 #define _mm_punpcklwd(_D, _d, _s)              \
    198  "punpckhwd %[" #_D "h], %[" #_d "l], %[" #_s \
    199  "l] \n\t"                                    \
    200  "punpcklwd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
    201 
    202 /* SSE: punpcklqdq */
    203 #define _mm_punpckldq(_D, _d, _s) \
    204  "mov.d %[" #_D "h], %[" #_s     \
    205  "l] \n\t"                       \
    206  "mov.d %[" #_D "l], %[" #_d "l] \n\t"
    207 
    208 /* SSE: punpckhbw */
    209 #define _mm_punpckhbh(_D, _d, _s)              \
    210  "punpcklbh %[" #_D "l], %[" #_d "h], %[" #_s \
    211  "h] \n\t"                                    \
    212  "punpckhbh %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
    213 
    214 /* SSE: punpckhwd */
    215 #define _mm_punpckhhw(_D, _d, _s)              \
    216  "punpcklhw %[" #_D "l], %[" #_d "h], %[" #_s \
    217  "h] \n\t"                                    \
    218  "punpckhhw %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
    219 
    220 /* SSE: punpckhdq */
    221 #define _mm_punpckhwd(_D, _d, _s)              \
    222  "punpcklwd %[" #_D "l], %[" #_d "h], %[" #_s \
    223  "h] \n\t"                                    \
    224  "punpckhwd %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
    225 
    226 /* SSE: punpckhqdq */
    227 #define _mm_punpckhdq(_D, _d, _s) \
    228  "mov.d %[" #_D "l], %[" #_d     \
    229  "h] \n\t"                       \
    230  "mov.d %[" #_D "h], %[" #_s "h] \n\t"
    231 
    232 #endif /* __MMI_HELPERS_H__ */