tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

ed25519-donna-64bit-sse2.h (16318B)


      1 #if defined(ED25519_GCC_64BIT_SSE_CHOOSE)
      2 
      3 #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
      4 
      5 DONNA_NOINLINE static void
      6 ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
      7 int64_t breg = (int64_t)b;
      8 uint64_t sign = (uint64_t)breg >> 63;
      9 uint64_t mask = ~(sign - 1);
     10 uint64_t u = (breg + mask) ^ mask;
     11 
     12 __asm__ __volatile__ (
     13 	/* ysubx+xaddy+t2d */
     14 	"movq %0, %%rax                  ;\n"
     15 	"movd %%rax, %%xmm14             ;\n"
     16 	"pshufd $0x00, %%xmm14, %%xmm14  ;\n"
     17 	"pxor %%xmm0, %%xmm0             ;\n"
     18 	"pxor %%xmm1, %%xmm1             ;\n"
     19 	"pxor %%xmm2, %%xmm2             ;\n"
     20 	"pxor %%xmm3, %%xmm3             ;\n"
     21 	"pxor %%xmm4, %%xmm4             ;\n"
     22 	"pxor %%xmm5, %%xmm5             ;\n"
     23 
     24 	/* 0 */
     25 	"movq $0, %%rax                  ;\n"
     26 	"movd %%rax, %%xmm15             ;\n"
     27 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
     28 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
     29 	"movq $1, %%rax                  ;\n"
     30 	"movd %%rax, %%xmm6              ;\n"
     31 	"pxor %%xmm7, %%xmm7             ;\n"
     32 	"pand %%xmm15, %%xmm6            ;\n"
     33 	"pand %%xmm15, %%xmm7            ;\n"
     34 	"por %%xmm6, %%xmm0              ;\n"
     35 	"por %%xmm7, %%xmm1              ;\n"
     36 	"por %%xmm6, %%xmm2              ;\n"
     37 	"por %%xmm7, %%xmm3              ;\n"
     38 
     39 	/* 1 */
     40 	"movq $1, %%rax                  ;\n"
     41 	"movd %%rax, %%xmm15             ;\n"
     42 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
     43 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
     44 	"movdqa 0(%1), %%xmm6            ;\n"
     45 	"movdqa 16(%1), %%xmm7           ;\n"
     46 	"movdqa 32(%1), %%xmm8           ;\n"
     47 	"movdqa 48(%1), %%xmm9           ;\n"
     48 	"movdqa 64(%1), %%xmm10          ;\n"
     49 	"movdqa 80(%1), %%xmm11          ;\n"
     50 	"pand %%xmm15, %%xmm6            ;\n"
     51 	"pand %%xmm15, %%xmm7            ;\n"
     52 	"pand %%xmm15, %%xmm8            ;\n"
     53 	"pand %%xmm15, %%xmm9            ;\n"
     54 	"pand %%xmm15, %%xmm10           ;\n"
     55 	"pand %%xmm15, %%xmm11           ;\n"
     56 	"por %%xmm6, %%xmm0              ;\n"
     57 	"por %%xmm7, %%xmm1              ;\n"
     58 	"por %%xmm8, %%xmm2              ;\n"
     59 	"por %%xmm9, %%xmm3              ;\n"
     60 	"por %%xmm10, %%xmm4             ;\n"
     61 	"por %%xmm11, %%xmm5             ;\n"
     62 
     63 	/* 2 */
     64 	"movq $2, %%rax                  ;\n"
     65 	"movd %%rax, %%xmm15             ;\n"
     66 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
     67 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
     68 	"movdqa 96(%1), %%xmm6           ;\n"
     69 	"movdqa 112(%1), %%xmm7          ;\n"
     70 	"movdqa 128(%1), %%xmm8          ;\n"
     71 	"movdqa 144(%1), %%xmm9          ;\n"
     72 	"movdqa 160(%1), %%xmm10         ;\n"
     73 	"movdqa 176(%1), %%xmm11         ;\n"
     74 	"pand %%xmm15, %%xmm6            ;\n"
     75 	"pand %%xmm15, %%xmm7            ;\n"
     76 	"pand %%xmm15, %%xmm8            ;\n"
     77 	"pand %%xmm15, %%xmm9            ;\n"
     78 	"pand %%xmm15, %%xmm10           ;\n"
     79 	"pand %%xmm15, %%xmm11           ;\n"
     80 	"por %%xmm6, %%xmm0              ;\n"
     81 	"por %%xmm7, %%xmm1              ;\n"
     82 	"por %%xmm8, %%xmm2              ;\n"
     83 	"por %%xmm9, %%xmm3              ;\n"
     84 	"por %%xmm10, %%xmm4             ;\n"
     85 	"por %%xmm11, %%xmm5             ;\n"
     86 
     87 	/* 3 */
     88 	"movq $3, %%rax                  ;\n"
     89 	"movd %%rax, %%xmm15             ;\n"
     90 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
     91 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
     92 	"movdqa 192(%1), %%xmm6          ;\n"
     93 	"movdqa 208(%1), %%xmm7          ;\n"
     94 	"movdqa 224(%1), %%xmm8          ;\n"
     95 	"movdqa 240(%1), %%xmm9          ;\n"
     96 	"movdqa 256(%1), %%xmm10         ;\n"
     97 	"movdqa 272(%1), %%xmm11         ;\n"
     98 	"pand %%xmm15, %%xmm6            ;\n"
     99 	"pand %%xmm15, %%xmm7            ;\n"
    100 	"pand %%xmm15, %%xmm8            ;\n"
    101 	"pand %%xmm15, %%xmm9            ;\n"
    102 	"pand %%xmm15, %%xmm10           ;\n"
    103 	"pand %%xmm15, %%xmm11           ;\n"
    104 	"por %%xmm6, %%xmm0              ;\n"
    105 	"por %%xmm7, %%xmm1              ;\n"
    106 	"por %%xmm8, %%xmm2              ;\n"
    107 	"por %%xmm9, %%xmm3              ;\n"
    108 	"por %%xmm10, %%xmm4             ;\n"
    109 	"por %%xmm11, %%xmm5             ;\n"
    110 
    111 	/* 4 */
    112 	"movq $4, %%rax                  ;\n"
    113 	"movd %%rax, %%xmm15             ;\n"
    114 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
    115 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    116 	"movdqa 288(%1), %%xmm6          ;\n"
    117 	"movdqa 304(%1), %%xmm7          ;\n"
    118 	"movdqa 320(%1), %%xmm8          ;\n"
    119 	"movdqa 336(%1), %%xmm9          ;\n"
    120 	"movdqa 352(%1), %%xmm10         ;\n"
    121 	"movdqa 368(%1), %%xmm11         ;\n"
    122 	"pand %%xmm15, %%xmm6            ;\n"
    123 	"pand %%xmm15, %%xmm7            ;\n"
    124 	"pand %%xmm15, %%xmm8            ;\n"
    125 	"pand %%xmm15, %%xmm9            ;\n"
    126 	"pand %%xmm15, %%xmm10           ;\n"
    127 	"pand %%xmm15, %%xmm11           ;\n"
    128 	"por %%xmm6, %%xmm0              ;\n"
    129 	"por %%xmm7, %%xmm1              ;\n"
    130 	"por %%xmm8, %%xmm2              ;\n"
    131 	"por %%xmm9, %%xmm3              ;\n"
    132 	"por %%xmm10, %%xmm4             ;\n"
    133 	"por %%xmm11, %%xmm5             ;\n"
    134 
    135 	/* 5 */
    136 	"movq $5, %%rax                  ;\n"
    137 	"movd %%rax, %%xmm15             ;\n"
    138 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
    139 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    140 	"movdqa 384(%1), %%xmm6          ;\n"
    141 	"movdqa 400(%1), %%xmm7          ;\n"
    142 	"movdqa 416(%1), %%xmm8          ;\n"
    143 	"movdqa 432(%1), %%xmm9          ;\n"
    144 	"movdqa 448(%1), %%xmm10         ;\n"
    145 	"movdqa 464(%1), %%xmm11         ;\n"
    146 	"pand %%xmm15, %%xmm6            ;\n"
    147 	"pand %%xmm15, %%xmm7            ;\n"
    148 	"pand %%xmm15, %%xmm8            ;\n"
    149 	"pand %%xmm15, %%xmm9            ;\n"
    150 	"pand %%xmm15, %%xmm10           ;\n"
    151 	"pand %%xmm15, %%xmm11           ;\n"
    152 	"por %%xmm6, %%xmm0              ;\n"
    153 	"por %%xmm7, %%xmm1              ;\n"
    154 	"por %%xmm8, %%xmm2              ;\n"
    155 	"por %%xmm9, %%xmm3              ;\n"
    156 	"por %%xmm10, %%xmm4             ;\n"
    157 	"por %%xmm11, %%xmm5             ;\n"
    158 
    159 	/* 6 */
    160 	"movq $6, %%rax                  ;\n"
    161 	"movd %%rax, %%xmm15             ;\n"
    162 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
    163 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    164 	"movdqa 480(%1), %%xmm6          ;\n"
    165 	"movdqa 496(%1), %%xmm7          ;\n"
    166 	"movdqa 512(%1), %%xmm8          ;\n"
    167 	"movdqa 528(%1), %%xmm9          ;\n"
    168 	"movdqa 544(%1), %%xmm10         ;\n"
    169 	"movdqa 560(%1), %%xmm11         ;\n"
    170 	"pand %%xmm15, %%xmm6            ;\n"
    171 	"pand %%xmm15, %%xmm7            ;\n"
    172 	"pand %%xmm15, %%xmm8            ;\n"
    173 	"pand %%xmm15, %%xmm9            ;\n"
    174 	"pand %%xmm15, %%xmm10           ;\n"
    175 	"pand %%xmm15, %%xmm11           ;\n"
    176 	"por %%xmm6, %%xmm0              ;\n"
    177 	"por %%xmm7, %%xmm1              ;\n"
    178 	"por %%xmm8, %%xmm2              ;\n"
    179 	"por %%xmm9, %%xmm3              ;\n"
    180 	"por %%xmm10, %%xmm4             ;\n"
    181 	"por %%xmm11, %%xmm5             ;\n"
    182 
    183 	/* 7 */
    184 	"movq $7, %%rax                  ;\n"
    185 	"movd %%rax, %%xmm15             ;\n"
    186 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
    187 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    188 	"movdqa 576(%1), %%xmm6          ;\n"
    189 	"movdqa 592(%1), %%xmm7          ;\n"
    190 	"movdqa 608(%1), %%xmm8          ;\n"
    191 	"movdqa 624(%1), %%xmm9          ;\n"
    192 	"movdqa 640(%1), %%xmm10         ;\n"
    193 	"movdqa 656(%1), %%xmm11         ;\n"
    194 	"pand %%xmm15, %%xmm6            ;\n"
    195 	"pand %%xmm15, %%xmm7            ;\n"
    196 	"pand %%xmm15, %%xmm8            ;\n"
    197 	"pand %%xmm15, %%xmm9            ;\n"
    198 	"pand %%xmm15, %%xmm10           ;\n"
    199 	"pand %%xmm15, %%xmm11           ;\n"
    200 	"por %%xmm6, %%xmm0              ;\n"
    201 	"por %%xmm7, %%xmm1              ;\n"
    202 	"por %%xmm8, %%xmm2              ;\n"
    203 	"por %%xmm9, %%xmm3              ;\n"
    204 	"por %%xmm10, %%xmm4             ;\n"
    205 	"por %%xmm11, %%xmm5             ;\n"
    206 
    207 	/* 8 */
    208 	"movq $8, %%rax                  ;\n"
    209 	"movd %%rax, %%xmm15             ;\n"
    210 	"pshufd $0x00, %%xmm15, %%xmm15  ;\n"
    211 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    212 	"movdqa 672(%1), %%xmm6          ;\n"
    213 	"movdqa 688(%1), %%xmm7          ;\n"
    214 	"movdqa 704(%1), %%xmm8          ;\n"
    215 	"movdqa 720(%1), %%xmm9          ;\n"
    216 	"movdqa 736(%1), %%xmm10         ;\n"
    217 	"movdqa 752(%1), %%xmm11         ;\n"
    218 	"pand %%xmm15, %%xmm6            ;\n"
    219 	"pand %%xmm15, %%xmm7            ;\n"
    220 	"pand %%xmm15, %%xmm8            ;\n"
    221 	"pand %%xmm15, %%xmm9            ;\n"
    222 	"pand %%xmm15, %%xmm10           ;\n"
    223 	"pand %%xmm15, %%xmm11           ;\n"
    224 	"por %%xmm6, %%xmm0              ;\n"
    225 	"por %%xmm7, %%xmm1              ;\n"
    226 	"por %%xmm8, %%xmm2              ;\n"
    227 	"por %%xmm9, %%xmm3              ;\n"
    228 	"por %%xmm10, %%xmm4             ;\n"
    229 	"por %%xmm11, %%xmm5             ;\n"
    230 
    231 	/* conditionally swap ysubx and xaddy */
    232 	"movq %3, %%rax                  ;\n"
    233 	"xorq $1, %%rax                  ;\n"
    234 	"movd %%rax, %%xmm14             ;\n"
    235 	"pxor %%xmm15, %%xmm15           ;\n"
    236 	"pshufd $0x00, %%xmm14, %%xmm14  ;\n"
    237 	"pxor %%xmm0, %%xmm2             ;\n"
    238 	"pxor %%xmm1, %%xmm3             ;\n"
    239 	"pcmpeqd %%xmm14, %%xmm15        ;\n"
    240 	"movdqa %%xmm2, %%xmm6           ;\n"
    241 	"movdqa %%xmm3, %%xmm7           ;\n"
    242 	"pand %%xmm15, %%xmm6            ;\n"
    243 	"pand %%xmm15, %%xmm7            ;\n"
    244 	"pxor %%xmm6, %%xmm0             ;\n"
    245 	"pxor %%xmm7, %%xmm1             ;\n"
    246 	"pxor %%xmm0, %%xmm2             ;\n"
    247 	"pxor %%xmm1, %%xmm3             ;\n"
    248 
    249 	/* store ysubx */
    250 	"xorq %%rax, %%rax               ;\n"
    251 	"movd %%xmm0, %%rcx              ;\n"
    252 	"movd %%xmm0, %%r8               ;\n"
    253 	"movd %%xmm1, %%rsi              ;\n"
    254 	"pshufd $0xee, %%xmm0, %%xmm0    ;\n"
    255 	"pshufd $0xee, %%xmm1, %%xmm1    ;\n"
    256 	"movd %%xmm0, %%rdx              ;\n"
    257 	"movd %%xmm1, %%rdi              ;\n"
    258 	"shrdq $51, %%rdx, %%r8          ;\n"
    259 	"shrdq $38, %%rsi, %%rdx         ;\n"
    260 	"shrdq $25, %%rdi, %%rsi         ;\n"
    261 	"shrq $12, %%rdi                 ;\n"
    262 	"movq %%rcx, %%r9                ;\n"
    263 	"movq %%r8, %%r10                ;\n"
    264 	"movq %%rdx, %%r11               ;\n"
    265 	"movq %%rsi, %%r12               ;\n"
    266 	"movq %%rdi, %%r13               ;\n"
    267 	"shrq $26, %%r9                  ;\n"
    268 	"shrq $26, %%r10                 ;\n"
    269 	"shrq $26, %%r11                 ;\n"
    270 	"shrq $26, %%r12                 ;\n"
    271 	"shrq $26, %%r13                 ;\n"
    272 	"andl $0x3ffffff, %%ecx          ;\n"
    273 	"andl $0x1ffffff, %%r9d          ;\n"
    274 	"andl $0x3ffffff, %%r8d          ;\n"
    275 	"andl $0x1ffffff, %%r10d         ;\n"
    276 	"andl $0x3ffffff, %%edx          ;\n"
    277 	"andl $0x1ffffff, %%r11d         ;\n"
    278 	"andl $0x3ffffff, %%esi          ;\n"
    279 	"andl $0x1ffffff, %%r12d         ;\n"
    280 	"andl $0x3ffffff, %%edi          ;\n"
    281 	"andl $0x1ffffff, %%r13d         ;\n"
    282 	"movl %%ecx, 0(%2)               ;\n"
    283 	"movl %%r9d, 4(%2)               ;\n"
    284 	"movl %%r8d, 8(%2)               ;\n"
    285 	"movl %%r10d, 12(%2)             ;\n"
    286 	"movl %%edx, 16(%2)              ;\n"
    287 	"movl %%r11d, 20(%2)             ;\n"
    288 	"movl %%esi, 24(%2)              ;\n"
    289 	"movl %%r12d, 28(%2)             ;\n"
    290 	"movl %%edi, 32(%2)              ;\n"
    291 	"movl %%r13d, 36(%2)             ;\n"
    292 	"movq %%rax, 40(%2)              ;\n"
    293 
    294 	/* store xaddy */
    295 	"movd %%xmm2, %%rcx              ;\n"
    296 	"movd %%xmm2, %%r8               ;\n"
    297 	"movd %%xmm3, %%rsi              ;\n"
    298 	"pshufd $0xee, %%xmm2, %%xmm2    ;\n"
    299 	"pshufd $0xee, %%xmm3, %%xmm3    ;\n"
    300 	"movd %%xmm2, %%rdx              ;\n"
    301 	"movd %%xmm3, %%rdi              ;\n"
    302 	"shrdq $51, %%rdx, %%r8          ;\n"
    303 	"shrdq $38, %%rsi, %%rdx         ;\n"
    304 	"shrdq $25, %%rdi, %%rsi         ;\n"
    305 	"shrq $12, %%rdi                 ;\n"
    306 	"movq %%rcx, %%r9                ;\n"
    307 	"movq %%r8, %%r10                ;\n"
    308 	"movq %%rdx, %%r11               ;\n"
    309 	"movq %%rsi, %%r12               ;\n"
    310 	"movq %%rdi, %%r13               ;\n"
    311 	"shrq $26, %%r9                  ;\n"
    312 	"shrq $26, %%r10                 ;\n"
    313 	"shrq $26, %%r11                 ;\n"
    314 	"shrq $26, %%r12                 ;\n"
    315 	"shrq $26, %%r13                 ;\n"
    316 	"andl $0x3ffffff, %%ecx          ;\n"
    317 	"andl $0x1ffffff, %%r9d          ;\n"
    318 	"andl $0x3ffffff, %%r8d          ;\n"
    319 	"andl $0x1ffffff, %%r10d         ;\n"
    320 	"andl $0x3ffffff, %%edx          ;\n"
    321 	"andl $0x1ffffff, %%r11d         ;\n"
    322 	"andl $0x3ffffff, %%esi          ;\n"
    323 	"andl $0x1ffffff, %%r12d         ;\n"
    324 	"andl $0x3ffffff, %%edi          ;\n"
    325 	"andl $0x1ffffff, %%r13d         ;\n"
    326 	"movl %%ecx, 48(%2)              ;\n"
    327 	"movl %%r9d, 52(%2)              ;\n"
    328 	"movl %%r8d, 56(%2)              ;\n"
    329 	"movl %%r10d, 60(%2)             ;\n"
    330 	"movl %%edx, 64(%2)              ;\n"
    331 	"movl %%r11d, 68(%2)             ;\n"
    332 	"movl %%esi, 72(%2)              ;\n"
    333 	"movl %%r12d, 76(%2)             ;\n"
    334 	"movl %%edi, 80(%2)              ;\n"
    335 	"movl %%r13d, 84(%2)             ;\n"
    336 	"movq %%rax, 88(%2)              ;\n"
    337 
    338 	/* extract t2d */
    339 	"xorq %%rax, %%rax               ;\n"
    340 	"movd %%xmm4, %%rcx              ;\n"
    341 	"movd %%xmm4, %%r8               ;\n"
    342 	"movd %%xmm5, %%rsi              ;\n"
    343 	"pshufd $0xee, %%xmm4, %%xmm4    ;\n"
    344 	"pshufd $0xee, %%xmm5, %%xmm5    ;\n"
    345 	"movd %%xmm4, %%rdx              ;\n"
    346 	"movd %%xmm5, %%rdi              ;\n"
    347 	"shrdq $51, %%rdx, %%r8          ;\n"
    348 	"shrdq $38, %%rsi, %%rdx         ;\n"
    349 	"shrdq $25, %%rdi, %%rsi         ;\n"
    350 	"shrq $12, %%rdi                 ;\n"
    351 	"movq %%rcx, %%r9                ;\n"
    352 	"movq %%r8, %%r10                ;\n"
    353 	"movq %%rdx, %%r11               ;\n"
    354 	"movq %%rsi, %%r12               ;\n"
    355 	"movq %%rdi, %%r13               ;\n"
    356 	"shrq $26, %%r9                  ;\n"
    357 	"shrq $26, %%r10                 ;\n"
    358 	"shrq $26, %%r11                 ;\n"
    359 	"shrq $26, %%r12                 ;\n"
    360 	"shrq $26, %%r13                 ;\n"
    361 	"andl $0x3ffffff, %%ecx          ;\n"
    362 	"andl $0x1ffffff, %%r9d          ;\n"
    363 	"andl $0x3ffffff, %%r8d          ;\n"
    364 	"andl $0x1ffffff, %%r10d         ;\n"
    365 	"andl $0x3ffffff, %%edx          ;\n"
    366 	"andl $0x1ffffff, %%r11d         ;\n"
    367 	"andl $0x3ffffff, %%esi          ;\n"
    368 	"andl $0x1ffffff, %%r12d         ;\n"
    369 	"andl $0x3ffffff, %%edi          ;\n"
    370 	"andl $0x1ffffff, %%r13d         ;\n"
    371 	"movd %%ecx, %%xmm0              ;\n"
    372 	"movd %%r9d, %%xmm4              ;\n"
    373 	"movd %%r8d, %%xmm8              ;\n"
    374 	"movd %%r10d, %%xmm3             ;\n"
    375 	"movd %%edx, %%xmm1              ;\n"
    376 	"movd %%r11d, %%xmm5             ;\n"
    377 	"movd %%esi, %%xmm6              ;\n"
    378 	"movd %%r12d, %%xmm7             ;\n"
    379 	"movd %%edi, %%xmm2              ;\n"
    380 	"movd %%r13d, %%xmm9             ;\n"
    381 	"punpckldq %%xmm4, %%xmm0        ;\n"
    382 	"punpckldq %%xmm3, %%xmm8        ;\n"
    383 	"punpckldq %%xmm5, %%xmm1        ;\n"
    384 	"punpckldq %%xmm7, %%xmm6        ;\n"
    385 	"punpckldq %%xmm9, %%xmm2        ;\n"
    386 	"punpcklqdq %%xmm8, %%xmm0       ;\n"
    387 	"punpcklqdq %%xmm6, %%xmm1       ;\n"
    388 
    389 	/* set up 2p in to 3/4 */
    390 	"movl $0x7ffffda, %%ecx          ;\n"
    391 	"movl $0x3fffffe, %%edx          ;\n"
    392 	"movl $0x7fffffe, %%eax          ;\n"
    393 	"movd %%ecx, %%xmm3              ;\n"
    394 	"movd %%edx, %%xmm5              ;\n"
    395 	"movd %%eax, %%xmm4              ;\n"
    396 	"punpckldq %%xmm5, %%xmm3        ;\n"
    397 	"punpckldq %%xmm5, %%xmm4        ;\n"
    398 	"punpcklqdq %%xmm4, %%xmm3       ;\n"
    399 	"movdqa %%xmm4, %%xmm5           ;\n"
    400 	"punpcklqdq %%xmm4, %%xmm4       ;\n"
    401 
    402 	/* subtract and conditionally move */
    403 	"movl %3, %%ecx                  ;\n"
    404 	"sub $1, %%ecx                   ;\n"
    405 	"movd %%ecx, %%xmm6              ;\n"
    406 	"pshufd $0x00, %%xmm6, %%xmm6    ;\n"
    407 	"movdqa %%xmm6, %%xmm7           ;\n"
    408 	"psubd %%xmm0, %%xmm3            ;\n"
    409 	"psubd %%xmm1, %%xmm4            ;\n"
    410 	"psubd %%xmm2, %%xmm5            ;\n"
    411 	"pand %%xmm6, %%xmm0             ;\n"
    412 	"pand %%xmm6, %%xmm1             ;\n"
    413 	"pand %%xmm6, %%xmm2             ;\n"
    414 	"pandn %%xmm3, %%xmm6            ;\n"
    415 	"movdqa %%xmm7, %%xmm3           ;\n"
    416 	"pandn %%xmm4, %%xmm7            ;\n"
    417 	"pandn %%xmm5, %%xmm3            ;\n"
    418 	"por %%xmm6, %%xmm0              ;\n"
    419 	"por %%xmm7, %%xmm1              ;\n"
    420 	"por %%xmm3, %%xmm2              ;\n"
    421 
    422 	/* store t2d */
    423 	"movdqa %%xmm0, 96(%2)           ;\n"
    424 	"movdqa %%xmm1, 112(%2)          ;\n"
    425 	"movdqa %%xmm2, 128(%2)          ;\n"
    426 	:
    427 	: "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
    428 	:
    429 		"%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", 
    430 		"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
    431 		"cc", "memory"
    432 );
    433 }
    434 
    435 #endif /* defined(ED25519_GCC_64BIT_SSE_CHOOSE) */