tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

ed25519-donna-32bit-sse2.h (18640B)


      1 #if defined(ED25519_GCC_32BIT_SSE_CHOOSE)
      2 
      3 #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
      4 
      5 DONNA_NOINLINE static void
      6 ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
      7 int32_t breg = (int32_t)b;
      8 uint32_t sign = (uint32_t)breg >> 31;
      9 uint32_t mask = ~(sign - 1);
     10 uint32_t u = (breg + mask) ^ mask;
     11 
     12 __asm__ __volatile__ (
     13 	/* ysubx+xaddy */
     14 	"movl %0, %%eax                  ;\n"
     15 	"movd %%eax, %%xmm6              ;\n"
     16 	"pshufd $0x00, %%xmm6, %%xmm6    ;\n"
     17 	"pxor %%xmm0, %%xmm0             ;\n"
     18 	"pxor %%xmm1, %%xmm1             ;\n"
     19 	"pxor %%xmm2, %%xmm2             ;\n"
     20 	"pxor %%xmm3, %%xmm3             ;\n"
     21 
     22 	/* 0 */
     23 	"movl $0, %%eax                  ;\n"
     24 	"movd %%eax, %%xmm7              ;\n"
     25 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
     26 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
     27 	"movl $1, %%ecx                  ;\n"
     28 	"movd %%ecx, %%xmm4              ;\n"
     29 	"pxor %%xmm5, %%xmm5             ;\n"
     30 	"pand %%xmm7, %%xmm4             ;\n"
     31 	"pand %%xmm7, %%xmm5             ;\n"
     32 	"por %%xmm4, %%xmm0              ;\n"
     33 	"por %%xmm5, %%xmm1              ;\n"
     34 	"por %%xmm4, %%xmm2              ;\n"
     35 	"por %%xmm5, %%xmm3              ;\n"
     36 
     37 	/* 1 */
     38 	"movl $1, %%eax                  ;\n"
     39 	"movd %%eax, %%xmm7              ;\n"
     40 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
     41 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
     42 	"movdqa 0(%1), %%xmm4            ;\n"
     43 	"movdqa 16(%1), %%xmm5           ;\n"
     44 	"pand %%xmm7, %%xmm4             ;\n"
     45 	"pand %%xmm7, %%xmm5             ;\n"
     46 	"por %%xmm4, %%xmm0              ;\n"
     47 	"por %%xmm5, %%xmm1              ;\n"
     48 	"movdqa 32(%1), %%xmm4           ;\n"
     49 	"movdqa 48(%1), %%xmm5           ;\n"
     50 	"pand %%xmm7, %%xmm4             ;\n"
     51 	"pand %%xmm7, %%xmm5             ;\n"
     52 	"por %%xmm4, %%xmm2              ;\n"
     53 	"por %%xmm5, %%xmm3              ;\n"
     54 
     55 	/* 2 */
     56 	"movl $2, %%eax                  ;\n"
     57 	"movd %%eax, %%xmm7              ;\n"
     58 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
     59 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
     60 	"movdqa 96(%1), %%xmm4           ;\n"
     61 	"movdqa 112(%1), %%xmm5          ;\n"
     62 	"pand %%xmm7, %%xmm4             ;\n"
     63 	"pand %%xmm7, %%xmm5             ;\n"
     64 	"por %%xmm4, %%xmm0              ;\n"
     65 	"por %%xmm5, %%xmm1              ;\n"
     66 	"movdqa 128(%1), %%xmm4          ;\n"
     67 	"movdqa 144(%1), %%xmm5          ;\n"
     68 	"pand %%xmm7, %%xmm4             ;\n"
     69 	"pand %%xmm7, %%xmm5             ;\n"
     70 	"por %%xmm4, %%xmm2              ;\n"
     71 	"por %%xmm5, %%xmm3              ;\n"
     72 
     73 	/* 3 */
     74 	"movl $3, %%eax                  ;\n"
     75 	"movd %%eax, %%xmm7              ;\n"
     76 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
     77 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
     78 	"movdqa 192(%1), %%xmm4          ;\n"
     79 	"movdqa 208(%1), %%xmm5          ;\n"
     80 	"pand %%xmm7, %%xmm4             ;\n"
     81 	"pand %%xmm7, %%xmm5             ;\n"
     82 	"por %%xmm4, %%xmm0              ;\n"
     83 	"por %%xmm5, %%xmm1              ;\n"
     84 	"movdqa 224(%1), %%xmm4          ;\n"
     85 	"movdqa 240(%1), %%xmm5          ;\n"
     86 	"pand %%xmm7, %%xmm4             ;\n"
     87 	"pand %%xmm7, %%xmm5             ;\n"
     88 	"por %%xmm4, %%xmm2              ;\n"
     89 	"por %%xmm5, %%xmm3              ;\n"
     90 
     91 	/* 4 */
     92 	"movl $4, %%eax                  ;\n"
     93 	"movd %%eax, %%xmm7              ;\n"
     94 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
     95 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
     96 	"movdqa 288(%1), %%xmm4          ;\n"
     97 	"movdqa 304(%1), %%xmm5          ;\n"
     98 	"pand %%xmm7, %%xmm4             ;\n"
     99 	"pand %%xmm7, %%xmm5             ;\n"
    100 	"por %%xmm4, %%xmm0              ;\n"
    101 	"por %%xmm5, %%xmm1              ;\n"
    102 	"movdqa 320(%1), %%xmm4          ;\n"
    103 	"movdqa 336(%1), %%xmm5          ;\n"
    104 	"pand %%xmm7, %%xmm4             ;\n"
    105 	"pand %%xmm7, %%xmm5             ;\n"
    106 	"por %%xmm4, %%xmm2              ;\n"
    107 	"por %%xmm5, %%xmm3              ;\n"
    108 
    109 	/* 5 */
    110 	"movl $5, %%eax                  ;\n"
    111 	"movd %%eax, %%xmm7              ;\n"
    112 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    113 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    114 	"movdqa 384(%1), %%xmm4          ;\n"
    115 	"movdqa 400(%1), %%xmm5          ;\n"
    116 	"pand %%xmm7, %%xmm4             ;\n"
    117 	"pand %%xmm7, %%xmm5             ;\n"
    118 	"por %%xmm4, %%xmm0              ;\n"
    119 	"por %%xmm5, %%xmm1              ;\n"
    120 	"movdqa 416(%1), %%xmm4          ;\n"
    121 	"movdqa 432(%1), %%xmm5          ;\n"
    122 	"pand %%xmm7, %%xmm4             ;\n"
    123 	"pand %%xmm7, %%xmm5             ;\n"
    124 	"por %%xmm4, %%xmm2              ;\n"
    125 	"por %%xmm5, %%xmm3              ;\n"
    126 
    127 	/* 6 */
    128 	"movl $6, %%eax                  ;\n"
    129 	"movd %%eax, %%xmm7              ;\n"
    130 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    131 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    132 	"movdqa 480(%1), %%xmm4          ;\n"
    133 	"movdqa 496(%1), %%xmm5          ;\n"
    134 	"pand %%xmm7, %%xmm4             ;\n"
    135 	"pand %%xmm7, %%xmm5             ;\n"
    136 	"por %%xmm4, %%xmm0              ;\n"
    137 	"por %%xmm5, %%xmm1              ;\n"
    138 	"movdqa 512(%1), %%xmm4          ;\n"
    139 	"movdqa 528(%1), %%xmm5          ;\n"
    140 	"pand %%xmm7, %%xmm4             ;\n"
    141 	"pand %%xmm7, %%xmm5             ;\n"
    142 	"por %%xmm4, %%xmm2              ;\n"
    143 	"por %%xmm5, %%xmm3              ;\n"
    144 
    145 	/* 7 */
    146 	"movl $7, %%eax                  ;\n"
    147 	"movd %%eax, %%xmm7              ;\n"
    148 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    149 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    150 	"movdqa 576(%1), %%xmm4          ;\n"
    151 	"movdqa 592(%1), %%xmm5          ;\n"
    152 	"pand %%xmm7, %%xmm4             ;\n"
    153 	"pand %%xmm7, %%xmm5             ;\n"
    154 	"por %%xmm4, %%xmm0              ;\n"
    155 	"por %%xmm5, %%xmm1              ;\n"
    156 	"movdqa 608(%1), %%xmm4          ;\n"
    157 	"movdqa 624(%1), %%xmm5          ;\n"
    158 	"pand %%xmm7, %%xmm4             ;\n"
    159 	"pand %%xmm7, %%xmm5             ;\n"
    160 	"por %%xmm4, %%xmm2              ;\n"
    161 	"por %%xmm5, %%xmm3              ;\n"
    162 
    163 	/* 8 */
    164 	"movl $8, %%eax                  ;\n"
    165 	"movd %%eax, %%xmm7              ;\n"
    166 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    167 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    168 	"movdqa 672(%1), %%xmm4          ;\n"
    169 	"movdqa 688(%1), %%xmm5          ;\n"
    170 	"pand %%xmm7, %%xmm4             ;\n"
    171 	"pand %%xmm7, %%xmm5             ;\n"
    172 	"por %%xmm4, %%xmm0              ;\n"
    173 	"por %%xmm5, %%xmm1              ;\n"
    174 	"movdqa 704(%1), %%xmm4          ;\n"
    175 	"movdqa 720(%1), %%xmm5          ;\n"
    176 	"pand %%xmm7, %%xmm4             ;\n"
    177 	"pand %%xmm7, %%xmm5             ;\n"
    178 	"por %%xmm4, %%xmm2              ;\n"
    179 	"por %%xmm5, %%xmm3              ;\n"
    180 
    181 	/* conditional swap based on sign */
    182 	"movl %3, %%ecx                  ;\n"
    183 	"movl %2, %%eax                  ;\n"
    184 	"xorl $1, %%ecx                  ;\n"
    185 	"movd %%ecx, %%xmm6              ;\n"
    186 	"pxor %%xmm7, %%xmm7             ;\n"
    187 	"pshufd $0x00, %%xmm6, %%xmm6    ;\n"
    188 	"pxor %%xmm0, %%xmm2             ;\n"
    189 	"pxor %%xmm1, %%xmm3             ;\n"
    190 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    191 	"movdqa %%xmm2, %%xmm4           ;\n"
    192 	"movdqa %%xmm3, %%xmm5           ;\n"
    193 	"pand %%xmm7, %%xmm4             ;\n"
    194 	"pand %%xmm7, %%xmm5             ;\n"
    195 	"pxor %%xmm4, %%xmm0             ;\n"
    196 	"pxor %%xmm5, %%xmm1             ;\n"
    197 	"pxor %%xmm0, %%xmm2             ;\n"
    198 	"pxor %%xmm1, %%xmm3             ;\n"
    199 
    200 	/* store ysubx */
    201 	"movd %%xmm0, %%ecx              ;\n"
    202 	"movl %%ecx, %%edx               ;\n"
    203 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    204 	"andl $0x3ffffff, %%ecx          ;\n"
    205 	"movl %%ecx, 0(%%eax)            ;\n"
    206 	"movd %%xmm0, %%ecx              ;\n"
    207 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    208 	"shrdl $26, %%ecx, %%edx         ;\n"
    209 	"andl $0x1ffffff, %%edx          ;\n"
    210 	"movl %%edx, 4(%%eax)            ;\n"
    211 	"movd %%xmm0, %%edx              ;\n"
    212 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    213 	"shrdl $19, %%edx, %%ecx         ;\n"
    214 	"andl $0x3ffffff, %%ecx          ;\n"
    215 	"movl %%ecx, 8(%%eax)            ;\n"
    216 	"movd %%xmm0, %%ecx              ;\n"
    217 	"shrdl $13, %%ecx, %%edx         ;\n"
    218 	"andl $0x1ffffff, %%edx          ;\n"
    219 	"movl %%edx, 12(%%eax)           ;\n"
    220 	"movd %%xmm1, %%edx              ;\n"
    221 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    222 	"shrl $6, %%ecx                  ;\n"
    223 	"andl $0x3ffffff, %%ecx          ;\n"
    224 	"movl %%ecx, 16(%%eax)           ;\n"
    225 	"movl %%edx, %%ecx               ;\n"
    226 	"andl $0x1ffffff, %%edx          ;\n"
    227 	"movl %%edx, 20(%%eax)           ;\n"
    228 	"movd %%xmm1, %%edx              ;\n"
    229 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    230 	"shrdl $25, %%edx, %%ecx         ;\n"
    231 	"andl $0x3ffffff, %%ecx          ;\n"
    232 	"movl %%ecx, 24(%%eax)           ;\n"
    233 	"movd %%xmm1, %%ecx              ;\n"
    234 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    235 	"shrdl $19, %%ecx, %%edx         ;\n"
    236 	"andl $0x1ffffff, %%edx          ;\n"
    237 	"movl %%edx, 28(%%eax)           ;\n"
    238 	"movd %%xmm1, %%edx              ;\n"
    239 	"shrdl $12, %%edx, %%ecx         ;\n"
    240 	"andl $0x3ffffff, %%ecx          ;\n"
    241 	"movl %%ecx, 32(%%eax)           ;\n"
    242 	"shrl $6, %%edx                  ;\n"
    243 	"andl $0x1ffffff, %%edx          ;\n"
    244 	"xorl %%ecx, %%ecx               ;\n"
    245 	"movl %%edx, 36(%%eax)           ;\n"
    246 	"movl %%ecx, 40(%%eax)           ;\n"
    247 	"movl %%ecx, 44(%%eax)           ;\n"
    248 
    249 	/* store xaddy */
    250 	"addl $48, %%eax                 ;\n"
    251 	"movdqa %%xmm2, %%xmm0           ;\n"
    252 	"movdqa %%xmm3, %%xmm1           ;\n"
    253 	"movd %%xmm0, %%ecx              ;\n"
    254 	"movl %%ecx, %%edx               ;\n"
    255 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    256 	"andl $0x3ffffff, %%ecx          ;\n"
    257 	"movl %%ecx, 0(%%eax)            ;\n"
    258 	"movd %%xmm0, %%ecx              ;\n"
    259 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    260 	"shrdl $26, %%ecx, %%edx         ;\n"
    261 	"andl $0x1ffffff, %%edx          ;\n"
    262 	"movl %%edx, 4(%%eax)            ;\n"
    263 	"movd %%xmm0, %%edx              ;\n"
    264 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    265 	"shrdl $19, %%edx, %%ecx         ;\n"
    266 	"andl $0x3ffffff, %%ecx          ;\n"
    267 	"movl %%ecx, 8(%%eax)            ;\n"
    268 	"movd %%xmm0, %%ecx              ;\n"
    269 	"shrdl $13, %%ecx, %%edx         ;\n"
    270 	"andl $0x1ffffff, %%edx          ;\n"
    271 	"movl %%edx, 12(%%eax)           ;\n"
    272 	"movd %%xmm1, %%edx              ;\n"
    273 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    274 	"shrl $6, %%ecx                  ;\n"
    275 	"andl $0x3ffffff, %%ecx          ;\n"
    276 	"movl %%ecx, 16(%%eax)           ;\n"
    277 	"movl %%edx, %%ecx               ;\n"
    278 	"andl $0x1ffffff, %%edx          ;\n"
    279 	"movl %%edx, 20(%%eax)           ;\n"
    280 	"movd %%xmm1, %%edx              ;\n"
    281 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    282 	"shrdl $25, %%edx, %%ecx         ;\n"
    283 	"andl $0x3ffffff, %%ecx          ;\n"
    284 	"movl %%ecx, 24(%%eax)           ;\n"
    285 	"movd %%xmm1, %%ecx              ;\n"
    286 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    287 	"shrdl $19, %%ecx, %%edx         ;\n"
    288 	"andl $0x1ffffff, %%edx          ;\n"
    289 	"movl %%edx, 28(%%eax)           ;\n"
    290 	"movd %%xmm1, %%edx              ;\n"
    291 	"shrdl $12, %%edx, %%ecx         ;\n"
    292 	"andl $0x3ffffff, %%ecx          ;\n"
    293 	"movl %%ecx, 32(%%eax)           ;\n"
    294 	"shrl $6, %%edx                  ;\n"
    295 	"andl $0x1ffffff, %%edx          ;\n"
    296 	"xorl %%ecx, %%ecx               ;\n"
    297 	"movl %%edx, 36(%%eax)           ;\n"
    298 	"movl %%ecx, 40(%%eax)           ;\n"
    299 	"movl %%ecx, 44(%%eax)           ;\n"
    300 
    301 	/* t2d */
    302 	"movl %0, %%eax                  ;\n"
    303 	"movd %%eax, %%xmm6              ;\n"
    304 	"pshufd $0x00, %%xmm6, %%xmm6    ;\n"
    305 	"pxor %%xmm0, %%xmm0             ;\n"
    306 	"pxor %%xmm1, %%xmm1             ;\n"
    307 
    308 	/* 0 */
    309 	"movl $0, %%eax                  ;\n"
    310 	"movd %%eax, %%xmm7              ;\n"
    311 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    312 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    313 	"pxor %%xmm0, %%xmm0             ;\n"
    314 	"pxor %%xmm1, %%xmm1             ;\n"
    315 
    316 	/* 1 */
    317 	"movl $1, %%eax                  ;\n"
    318 	"movd %%eax, %%xmm7              ;\n"
    319 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    320 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    321 	"movdqa 64(%1), %%xmm3           ;\n"
    322 	"movdqa 80(%1), %%xmm4           ;\n"
    323 	"pand %%xmm7, %%xmm3             ;\n"
    324 	"pand %%xmm7, %%xmm4             ;\n"
    325 	"por %%xmm3, %%xmm0              ;\n"
    326 	"por %%xmm4, %%xmm1              ;\n"
    327 
    328 	/* 2 */
    329 	"movl $2, %%eax                  ;\n"
    330 	"movd %%eax, %%xmm7              ;\n"
    331 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    332 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    333 	"movdqa 160(%1), %%xmm3          ;\n"
    334 	"movdqa 176(%1), %%xmm4          ;\n"
    335 	"pand %%xmm7, %%xmm3             ;\n"
    336 	"pand %%xmm7, %%xmm4             ;\n"
    337 	"por %%xmm3, %%xmm0              ;\n"
    338 	"por %%xmm4, %%xmm1              ;\n"
    339 
    340 	/* 3 */
    341 	"movl $3, %%eax                  ;\n"
    342 	"movd %%eax, %%xmm7              ;\n"
    343 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    344 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    345 	"movdqa 256(%1), %%xmm3          ;\n"
    346 	"movdqa 272(%1), %%xmm4          ;\n"
    347 	"pand %%xmm7, %%xmm3             ;\n"
    348 	"pand %%xmm7, %%xmm4             ;\n"
    349 	"por %%xmm3, %%xmm0              ;\n"
    350 	"por %%xmm4, %%xmm1              ;\n"
    351 
    352 	/* 4 */
    353 	"movl $4, %%eax                  ;\n"
    354 	"movd %%eax, %%xmm7              ;\n"
    355 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    356 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    357 	"movdqa 352(%1), %%xmm3          ;\n"
    358 	"movdqa 368(%1), %%xmm4          ;\n"
    359 	"pand %%xmm7, %%xmm3             ;\n"
    360 	"pand %%xmm7, %%xmm4             ;\n"
    361 	"por %%xmm3, %%xmm0              ;\n"
    362 	"por %%xmm4, %%xmm1              ;\n"
    363 
    364 	/* 5 */
    365 	"movl $5, %%eax                  ;\n"
    366 	"movd %%eax, %%xmm7              ;\n"
    367 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    368 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    369 	"movdqa 448(%1), %%xmm3          ;\n"
    370 	"movdqa 464(%1), %%xmm4          ;\n"
    371 	"pand %%xmm7, %%xmm3             ;\n"
    372 	"pand %%xmm7, %%xmm4             ;\n"
    373 	"por %%xmm3, %%xmm0              ;\n"
    374 	"por %%xmm4, %%xmm1              ;\n"
    375 
    376 	/* 6 */
    377 	"movl $6, %%eax                  ;\n"
    378 	"movd %%eax, %%xmm7              ;\n"
    379 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    380 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    381 	"movdqa 544(%1), %%xmm3          ;\n"
    382 	"movdqa 560(%1), %%xmm4          ;\n"
    383 	"pand %%xmm7, %%xmm3             ;\n"
    384 	"pand %%xmm7, %%xmm4             ;\n"
    385 	"por %%xmm3, %%xmm0              ;\n"
    386 	"por %%xmm4, %%xmm1              ;\n"
    387 
    388 	/* 7 */
    389 	"movl $7, %%eax                  ;\n"
    390 	"movd %%eax, %%xmm7              ;\n"
    391 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    392 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    393 	"movdqa 640(%1), %%xmm3          ;\n"
    394 	"movdqa 656(%1), %%xmm4          ;\n"
    395 	"pand %%xmm7, %%xmm3             ;\n"
    396 	"pand %%xmm7, %%xmm4             ;\n"
    397 	"por %%xmm3, %%xmm0              ;\n"
    398 	"por %%xmm4, %%xmm1              ;\n"
    399 
    400 	/* 8 */
    401 	"movl $8, %%eax                  ;\n"
    402 	"movd %%eax, %%xmm7              ;\n"
    403 	"pshufd $0x00, %%xmm7, %%xmm7    ;\n"
    404 	"pcmpeqd %%xmm6, %%xmm7          ;\n"
    405 	"movdqa 736(%1), %%xmm3          ;\n"
    406 	"movdqa 752(%1), %%xmm4          ;\n"
    407 	"pand %%xmm7, %%xmm3             ;\n"
    408 	"pand %%xmm7, %%xmm4             ;\n"
    409 	"por %%xmm3, %%xmm0              ;\n"
    410 	"por %%xmm4, %%xmm1              ;\n"
    411 
    412 	/* store t2d */
    413 	"movl %2, %%eax                  ;\n"
    414 	"addl $96, %%eax                 ;\n"
    415 	"movd %%xmm0, %%ecx              ;\n"
    416 	"movl %%ecx, %%edx               ;\n"
    417 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    418 	"andl $0x3ffffff, %%ecx          ;\n"
    419 	"movl %%ecx, 0(%%eax)            ;\n"
    420 	"movd %%xmm0, %%ecx              ;\n"
    421 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    422 	"shrdl $26, %%ecx, %%edx         ;\n"
    423 	"andl $0x1ffffff, %%edx          ;\n"
    424 	"movl %%edx, 4(%%eax)            ;\n"
    425 	"movd %%xmm0, %%edx              ;\n"
    426 	"pshufd $0x39, %%xmm0, %%xmm0    ;\n"
    427 	"shrdl $19, %%edx, %%ecx         ;\n"
    428 	"andl $0x3ffffff, %%ecx          ;\n"
    429 	"movl %%ecx, 8(%%eax)            ;\n"
    430 	"movd %%xmm0, %%ecx              ;\n"
    431 	"shrdl $13, %%ecx, %%edx         ;\n"
    432 	"andl $0x1ffffff, %%edx          ;\n"
    433 	"movl %%edx, 12(%%eax)           ;\n"
    434 	"movd %%xmm1, %%edx              ;\n"
    435 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    436 	"shrl $6, %%ecx                  ;\n"
    437 	"andl $0x3ffffff, %%ecx          ;\n"
    438 	"movl %%ecx, 16(%%eax)           ;\n"
    439 	"movl %%edx, %%ecx               ;\n"
    440 	"andl $0x1ffffff, %%edx          ;\n"
    441 	"movl %%edx, 20(%%eax)           ;\n"
    442 	"movd %%xmm1, %%edx              ;\n"
    443 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    444 	"shrdl $25, %%edx, %%ecx         ;\n"
    445 	"andl $0x3ffffff, %%ecx          ;\n"
    446 	"movl %%ecx, 24(%%eax)           ;\n"
    447 	"movd %%xmm1, %%ecx              ;\n"
    448 	"pshufd $0x39, %%xmm1, %%xmm1    ;\n"
    449 	"shrdl $19, %%ecx, %%edx         ;\n"
    450 	"andl $0x1ffffff, %%edx          ;\n"
    451 	"movl %%edx, 28(%%eax)           ;\n"
    452 	"movd %%xmm1, %%edx              ;\n"
    453 	"movd %%xmm1, %%edx              ;\n"
    454 	"shrdl $12, %%edx, %%ecx         ;\n"
    455 	"andl $0x3ffffff, %%ecx          ;\n"
    456 	"movl %%ecx, 32(%%eax)           ;\n"
    457 	"shrl $6, %%edx                  ;\n"
    458 	"andl $0x1ffffff, %%edx          ;\n"
    459 	"xorl %%ecx, %%ecx               ;\n"
    460 	"movl %%edx, 36(%%eax)           ;\n"
    461 	"movl %%ecx, 40(%%eax)           ;\n"
    462 	"movl %%ecx, 44(%%eax)           ;\n"
    463 	"movdqa 0(%%eax), %%xmm0         ;\n"
    464 	"movdqa 16(%%eax), %%xmm1        ;\n"
    465 	"movdqa 32(%%eax), %%xmm2        ;\n"
    466 
    467 	/* conditionally negate t2d */
    468 
    469 	/* set up 2p in to 3/4 */
    470 	"movl $0x7ffffda, %%ecx          ;\n"
    471 	"movl $0x3fffffe, %%edx          ;\n"
    472 	"movd %%ecx, %%xmm3              ;\n"
    473 	"movd %%edx, %%xmm5              ;\n"
    474 	"movl $0x7fffffe, %%ecx          ;\n"
    475 	"movd %%ecx, %%xmm4              ;\n"
    476 	"punpckldq %%xmm5, %%xmm3        ;\n"
    477 	"punpckldq %%xmm5, %%xmm4        ;\n"
    478 	"punpcklqdq %%xmm4, %%xmm3       ;\n"
    479 	"movdqa %%xmm4, %%xmm5           ;\n"
    480 	"punpcklqdq %%xmm4, %%xmm4       ;\n"
    481 
    482 	/* subtract and conditionally move */
    483 	"movl %3, %%ecx                  ;\n"
    484 	"sub $1, %%ecx                   ;\n"
    485 	"movd %%ecx, %%xmm6              ;\n"
    486 	"pshufd $0x00, %%xmm6, %%xmm6    ;\n"
    487 	"movdqa %%xmm6, %%xmm7           ;\n"
    488 	"psubd %%xmm0, %%xmm3            ;\n"
    489 	"psubd %%xmm1, %%xmm4            ;\n"
    490 	"psubd %%xmm2, %%xmm5            ;\n"
    491 	"pand %%xmm6, %%xmm0             ;\n"
    492 	"pand %%xmm6, %%xmm1             ;\n"
    493 	"pand %%xmm6, %%xmm2             ;\n"
    494 	"pandn %%xmm3, %%xmm6            ;\n"
    495 	"movdqa %%xmm7, %%xmm3           ;\n"
    496 	"pandn %%xmm4, %%xmm7            ;\n"
    497 	"pandn %%xmm5, %%xmm3            ;\n"
    498 	"por %%xmm6, %%xmm0              ;\n"
    499 	"por %%xmm7, %%xmm1              ;\n"
    500 	"por %%xmm3, %%xmm2              ;\n"
    501 
    502 	/* store */
    503 	"movdqa %%xmm0, 0(%%eax)         ;\n"
    504 	"movdqa %%xmm1, 16(%%eax)        ;\n"
    505 	"movdqa %%xmm2, 32(%%eax)        ;\n"
    506 	:
    507 	: "m"(u), "r"(&table[pos * 8]), "m"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
    508 	: "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
    509 );
    510 }
    511 
    512 #endif /* defined(ED25519_GCC_32BIT_SSE_CHOOSE) */