ed25519-donna-64bit-x86.h (13127B)
1 #if defined(ED25519_GCC_64BIT_X86_CHOOSE) 2 3 #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS 4 5 #ifdef __clang__ 6 #pragma clang diagnostic push 7 #pragma clang diagnostic ignored "-Woverlength-strings" 8 #endif 9 10 DONNA_NOINLINE static void 11 ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) { 12 int64_t breg = (int64_t)b; 13 uint64_t sign = (uint64_t)breg >> 63; 14 uint64_t mask = ~(sign - 1); 15 uint64_t u = (breg + mask) ^ mask; 16 17 __asm__ __volatile__ ( 18 /* ysubx+xaddy+t2d */ 19 "movq %0, %%rax ;\n" 20 "movd %%rax, %%xmm14 ;\n" 21 "pshufd $0x00, %%xmm14, %%xmm14 ;\n" 22 "pxor %%xmm0, %%xmm0 ;\n" 23 "pxor %%xmm1, %%xmm1 ;\n" 24 "pxor %%xmm2, %%xmm2 ;\n" 25 "pxor %%xmm3, %%xmm3 ;\n" 26 "pxor %%xmm4, %%xmm4 ;\n" 27 "pxor %%xmm5, %%xmm5 ;\n" 28 29 /* 0 */ 30 "movq $0, %%rax ;\n" 31 "movd %%rax, %%xmm15 ;\n" 32 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 33 "pcmpeqd %%xmm14, %%xmm15 ;\n" 34 "movq $1, %%rax ;\n" 35 "movd %%rax, %%xmm6 ;\n" 36 "pxor %%xmm7, %%xmm7 ;\n" 37 "pand %%xmm15, %%xmm6 ;\n" 38 "pand %%xmm15, %%xmm7 ;\n" 39 "por %%xmm6, %%xmm0 ;\n" 40 "por %%xmm7, %%xmm1 ;\n" 41 "por %%xmm6, %%xmm2 ;\n" 42 "por %%xmm7, %%xmm3 ;\n" 43 44 /* 1 */ 45 "movq $1, %%rax ;\n" 46 "movd %%rax, %%xmm15 ;\n" 47 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 48 "pcmpeqd %%xmm14, %%xmm15 ;\n" 49 "movdqa 0(%1), %%xmm6 ;\n" 50 "movdqa 16(%1), %%xmm7 ;\n" 51 "movdqa 32(%1), %%xmm8 ;\n" 52 "movdqa 48(%1), %%xmm9 ;\n" 53 "movdqa 64(%1), %%xmm10 ;\n" 54 "movdqa 80(%1), %%xmm11 ;\n" 55 "pand %%xmm15, %%xmm6 ;\n" 56 "pand %%xmm15, %%xmm7 ;\n" 57 "pand %%xmm15, %%xmm8 ;\n" 58 "pand %%xmm15, %%xmm9 ;\n" 59 "pand %%xmm15, %%xmm10 ;\n" 60 "pand %%xmm15, %%xmm11 ;\n" 61 "por %%xmm6, %%xmm0 ;\n" 62 "por %%xmm7, %%xmm1 ;\n" 63 "por %%xmm8, %%xmm2 ;\n" 64 "por %%xmm9, %%xmm3 ;\n" 65 "por %%xmm10, %%xmm4 ;\n" 66 "por %%xmm11, %%xmm5 ;\n" 67 68 /* 2 */ 69 "movq $2, %%rax ;\n" 70 "movd %%rax, %%xmm15 ;\n" 71 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 72 "pcmpeqd %%xmm14, %%xmm15 ;\n" 73 "movdqa 96(%1), %%xmm6 ;\n" 74 "movdqa 112(%1), %%xmm7 ;\n" 75 "movdqa 128(%1), %%xmm8 ;\n" 76 "movdqa 144(%1), %%xmm9 ;\n" 77 "movdqa 160(%1), %%xmm10 ;\n" 78 "movdqa 176(%1), %%xmm11 ;\n" 79 "pand %%xmm15, %%xmm6 ;\n" 80 "pand %%xmm15, %%xmm7 ;\n" 81 "pand %%xmm15, %%xmm8 ;\n" 82 "pand %%xmm15, %%xmm9 ;\n" 83 "pand %%xmm15, %%xmm10 ;\n" 84 "pand %%xmm15, %%xmm11 ;\n" 85 "por %%xmm6, %%xmm0 ;\n" 86 "por %%xmm7, %%xmm1 ;\n" 87 "por %%xmm8, %%xmm2 ;\n" 88 "por %%xmm9, %%xmm3 ;\n" 89 "por %%xmm10, %%xmm4 ;\n" 90 "por %%xmm11, %%xmm5 ;\n" 91 92 /* 3 */ 93 "movq $3, %%rax ;\n" 94 "movd %%rax, %%xmm15 ;\n" 95 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 96 "pcmpeqd %%xmm14, %%xmm15 ;\n" 97 "movdqa 192(%1), %%xmm6 ;\n" 98 "movdqa 208(%1), %%xmm7 ;\n" 99 "movdqa 224(%1), %%xmm8 ;\n" 100 "movdqa 240(%1), %%xmm9 ;\n" 101 "movdqa 256(%1), %%xmm10 ;\n" 102 "movdqa 272(%1), %%xmm11 ;\n" 103 "pand %%xmm15, %%xmm6 ;\n" 104 "pand %%xmm15, %%xmm7 ;\n" 105 "pand %%xmm15, %%xmm8 ;\n" 106 "pand %%xmm15, %%xmm9 ;\n" 107 "pand %%xmm15, %%xmm10 ;\n" 108 "pand %%xmm15, %%xmm11 ;\n" 109 "por %%xmm6, %%xmm0 ;\n" 110 "por %%xmm7, %%xmm1 ;\n" 111 "por %%xmm8, %%xmm2 ;\n" 112 "por %%xmm9, %%xmm3 ;\n" 113 "por %%xmm10, %%xmm4 ;\n" 114 "por %%xmm11, %%xmm5 ;\n" 115 116 /* 4 */ 117 "movq $4, %%rax ;\n" 118 "movd %%rax, %%xmm15 ;\n" 119 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 120 "pcmpeqd %%xmm14, %%xmm15 ;\n" 121 "movdqa 288(%1), %%xmm6 ;\n" 122 "movdqa 304(%1), %%xmm7 ;\n" 123 "movdqa 320(%1), %%xmm8 ;\n" 124 "movdqa 336(%1), %%xmm9 ;\n" 125 "movdqa 352(%1), %%xmm10 ;\n" 126 "movdqa 368(%1), %%xmm11 ;\n" 127 "pand %%xmm15, %%xmm6 ;\n" 128 "pand %%xmm15, %%xmm7 ;\n" 129 "pand %%xmm15, %%xmm8 ;\n" 130 "pand %%xmm15, %%xmm9 ;\n" 131 "pand %%xmm15, %%xmm10 ;\n" 132 "pand %%xmm15, %%xmm11 ;\n" 133 "por %%xmm6, %%xmm0 ;\n" 134 "por %%xmm7, %%xmm1 ;\n" 135 "por %%xmm8, %%xmm2 ;\n" 136 "por %%xmm9, %%xmm3 ;\n" 137 "por %%xmm10, %%xmm4 ;\n" 138 "por %%xmm11, %%xmm5 ;\n" 139 140 /* 5 */ 141 "movq $5, %%rax ;\n" 142 "movd %%rax, %%xmm15 ;\n" 143 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 144 "pcmpeqd %%xmm14, %%xmm15 ;\n" 145 "movdqa 384(%1), %%xmm6 ;\n" 146 "movdqa 400(%1), %%xmm7 ;\n" 147 "movdqa 416(%1), %%xmm8 ;\n" 148 "movdqa 432(%1), %%xmm9 ;\n" 149 "movdqa 448(%1), %%xmm10 ;\n" 150 "movdqa 464(%1), %%xmm11 ;\n" 151 "pand %%xmm15, %%xmm6 ;\n" 152 "pand %%xmm15, %%xmm7 ;\n" 153 "pand %%xmm15, %%xmm8 ;\n" 154 "pand %%xmm15, %%xmm9 ;\n" 155 "pand %%xmm15, %%xmm10 ;\n" 156 "pand %%xmm15, %%xmm11 ;\n" 157 "por %%xmm6, %%xmm0 ;\n" 158 "por %%xmm7, %%xmm1 ;\n" 159 "por %%xmm8, %%xmm2 ;\n" 160 "por %%xmm9, %%xmm3 ;\n" 161 "por %%xmm10, %%xmm4 ;\n" 162 "por %%xmm11, %%xmm5 ;\n" 163 164 /* 6 */ 165 "movq $6, %%rax ;\n" 166 "movd %%rax, %%xmm15 ;\n" 167 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 168 "pcmpeqd %%xmm14, %%xmm15 ;\n" 169 "movdqa 480(%1), %%xmm6 ;\n" 170 "movdqa 496(%1), %%xmm7 ;\n" 171 "movdqa 512(%1), %%xmm8 ;\n" 172 "movdqa 528(%1), %%xmm9 ;\n" 173 "movdqa 544(%1), %%xmm10 ;\n" 174 "movdqa 560(%1), %%xmm11 ;\n" 175 "pand %%xmm15, %%xmm6 ;\n" 176 "pand %%xmm15, %%xmm7 ;\n" 177 "pand %%xmm15, %%xmm8 ;\n" 178 "pand %%xmm15, %%xmm9 ;\n" 179 "pand %%xmm15, %%xmm10 ;\n" 180 "pand %%xmm15, %%xmm11 ;\n" 181 "por %%xmm6, %%xmm0 ;\n" 182 "por %%xmm7, %%xmm1 ;\n" 183 "por %%xmm8, %%xmm2 ;\n" 184 "por %%xmm9, %%xmm3 ;\n" 185 "por %%xmm10, %%xmm4 ;\n" 186 "por %%xmm11, %%xmm5 ;\n" 187 188 /* 7 */ 189 "movq $7, %%rax ;\n" 190 "movd %%rax, %%xmm15 ;\n" 191 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 192 "pcmpeqd %%xmm14, %%xmm15 ;\n" 193 "movdqa 576(%1), %%xmm6 ;\n" 194 "movdqa 592(%1), %%xmm7 ;\n" 195 "movdqa 608(%1), %%xmm8 ;\n" 196 "movdqa 624(%1), %%xmm9 ;\n" 197 "movdqa 640(%1), %%xmm10 ;\n" 198 "movdqa 656(%1), %%xmm11 ;\n" 199 "pand %%xmm15, %%xmm6 ;\n" 200 "pand %%xmm15, %%xmm7 ;\n" 201 "pand %%xmm15, %%xmm8 ;\n" 202 "pand %%xmm15, %%xmm9 ;\n" 203 "pand %%xmm15, %%xmm10 ;\n" 204 "pand %%xmm15, %%xmm11 ;\n" 205 "por %%xmm6, %%xmm0 ;\n" 206 "por %%xmm7, %%xmm1 ;\n" 207 "por %%xmm8, %%xmm2 ;\n" 208 "por %%xmm9, %%xmm3 ;\n" 209 "por %%xmm10, %%xmm4 ;\n" 210 "por %%xmm11, %%xmm5 ;\n" 211 212 /* 8 */ 213 "movq $8, %%rax ;\n" 214 "movd %%rax, %%xmm15 ;\n" 215 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 216 "pcmpeqd %%xmm14, %%xmm15 ;\n" 217 "movdqa 672(%1), %%xmm6 ;\n" 218 "movdqa 688(%1), %%xmm7 ;\n" 219 "movdqa 704(%1), %%xmm8 ;\n" 220 "movdqa 720(%1), %%xmm9 ;\n" 221 "movdqa 736(%1), %%xmm10 ;\n" 222 "movdqa 752(%1), %%xmm11 ;\n" 223 "pand %%xmm15, %%xmm6 ;\n" 224 "pand %%xmm15, %%xmm7 ;\n" 225 "pand %%xmm15, %%xmm8 ;\n" 226 "pand %%xmm15, %%xmm9 ;\n" 227 "pand %%xmm15, %%xmm10 ;\n" 228 "pand %%xmm15, %%xmm11 ;\n" 229 "por %%xmm6, %%xmm0 ;\n" 230 "por %%xmm7, %%xmm1 ;\n" 231 "por %%xmm8, %%xmm2 ;\n" 232 "por %%xmm9, %%xmm3 ;\n" 233 "por %%xmm10, %%xmm4 ;\n" 234 "por %%xmm11, %%xmm5 ;\n" 235 236 /* conditionally swap ysubx and xaddy */ 237 "movq %3, %%rax ;\n" 238 "xorq $1, %%rax ;\n" 239 "movd %%rax, %%xmm14 ;\n" 240 "pxor %%xmm15, %%xmm15 ;\n" 241 "pshufd $0x00, %%xmm14, %%xmm14 ;\n" 242 "pxor %%xmm0, %%xmm2 ;\n" 243 "pxor %%xmm1, %%xmm3 ;\n" 244 "pcmpeqd %%xmm14, %%xmm15 ;\n" 245 "movdqa %%xmm2, %%xmm6 ;\n" 246 "movdqa %%xmm3, %%xmm7 ;\n" 247 "pand %%xmm15, %%xmm6 ;\n" 248 "pand %%xmm15, %%xmm7 ;\n" 249 "pxor %%xmm6, %%xmm0 ;\n" 250 "pxor %%xmm7, %%xmm1 ;\n" 251 "pxor %%xmm0, %%xmm2 ;\n" 252 "pxor %%xmm1, %%xmm3 ;\n" 253 254 /* store ysubx */ 255 "movq $0x7ffffffffffff, %%rax ;\n" 256 "movd %%xmm0, %%rcx ;\n" 257 "movd %%xmm0, %%r8 ;\n" 258 "movd %%xmm1, %%rsi ;\n" 259 "pshufd $0xee, %%xmm0, %%xmm0 ;\n" 260 "pshufd $0xee, %%xmm1, %%xmm1 ;\n" 261 "movd %%xmm0, %%rdx ;\n" 262 "movd %%xmm1, %%rdi ;\n" 263 "shrdq $51, %%rdx, %%r8 ;\n" 264 "shrdq $38, %%rsi, %%rdx ;\n" 265 "shrdq $25, %%rdi, %%rsi ;\n" 266 "shrq $12, %%rdi ;\n" 267 "andq %%rax, %%rcx ;\n" 268 "andq %%rax, %%r8 ;\n" 269 "andq %%rax, %%rdx ;\n" 270 "andq %%rax, %%rsi ;\n" 271 "andq %%rax, %%rdi ;\n" 272 "movq %%rcx, 0(%2) ;\n" 273 "movq %%r8, 8(%2) ;\n" 274 "movq %%rdx, 16(%2) ;\n" 275 "movq %%rsi, 24(%2) ;\n" 276 "movq %%rdi, 32(%2) ;\n" 277 278 /* store xaddy */ 279 "movq $0x7ffffffffffff, %%rax ;\n" 280 "movd %%xmm2, %%rcx ;\n" 281 "movd %%xmm2, %%r8 ;\n" 282 "movd %%xmm3, %%rsi ;\n" 283 "pshufd $0xee, %%xmm2, %%xmm2 ;\n" 284 "pshufd $0xee, %%xmm3, %%xmm3 ;\n" 285 "movd %%xmm2, %%rdx ;\n" 286 "movd %%xmm3, %%rdi ;\n" 287 "shrdq $51, %%rdx, %%r8 ;\n" 288 "shrdq $38, %%rsi, %%rdx ;\n" 289 "shrdq $25, %%rdi, %%rsi ;\n" 290 "shrq $12, %%rdi ;\n" 291 "andq %%rax, %%rcx ;\n" 292 "andq %%rax, %%r8 ;\n" 293 "andq %%rax, %%rdx ;\n" 294 "andq %%rax, %%rsi ;\n" 295 "andq %%rax, %%rdi ;\n" 296 "movq %%rcx, 40(%2) ;\n" 297 "movq %%r8, 48(%2) ;\n" 298 "movq %%rdx, 56(%2) ;\n" 299 "movq %%rsi, 64(%2) ;\n" 300 "movq %%rdi, 72(%2) ;\n" 301 302 /* extract t2d */ 303 "movq $0x7ffffffffffff, %%rax ;\n" 304 "movd %%xmm4, %%rcx ;\n" 305 "movd %%xmm4, %%r8 ;\n" 306 "movd %%xmm5, %%rsi ;\n" 307 "pshufd $0xee, %%xmm4, %%xmm4 ;\n" 308 "pshufd $0xee, %%xmm5, %%xmm5 ;\n" 309 "movd %%xmm4, %%rdx ;\n" 310 "movd %%xmm5, %%rdi ;\n" 311 "shrdq $51, %%rdx, %%r8 ;\n" 312 "shrdq $38, %%rsi, %%rdx ;\n" 313 "shrdq $25, %%rdi, %%rsi ;\n" 314 "shrq $12, %%rdi ;\n" 315 "andq %%rax, %%rcx ;\n" 316 "andq %%rax, %%r8 ;\n" 317 "andq %%rax, %%rdx ;\n" 318 "andq %%rax, %%rsi ;\n" 319 "andq %%rax, %%rdi ;\n" 320 321 /* conditionally negate t2d */ 322 "movq %3, %%rax ;\n" 323 "movq $0xfffffffffffda, %%r9 ;\n" 324 "movq $0xffffffffffffe, %%r10 ;\n" 325 "movq %%r10, %%r11 ;\n" 326 "movq %%r10, %%r12 ;\n" 327 "movq %%r10, %%r13 ;\n" 328 "subq %%rcx, %%r9 ;\n" 329 "subq %%r8, %%r10 ;\n" 330 "subq %%rdx, %%r11 ;\n" 331 "subq %%rsi, %%r12 ;\n" 332 "subq %%rdi, %%r13 ;\n" 333 "cmpq $1, %%rax ;\n" 334 "cmove %%r9, %%rcx ;\n" 335 "cmove %%r10, %%r8 ;\n" 336 "cmove %%r11, %%rdx ;\n" 337 "cmove %%r12, %%rsi ;\n" 338 "cmove %%r13, %%rdi ;\n" 339 340 /* store t2d */ 341 "movq %%rcx, 80(%2) ;\n" 342 "movq %%r8, 88(%2) ;\n" 343 "movq %%rdx, 96(%2) ;\n" 344 "movq %%rsi, 104(%2) ;\n" 345 "movq %%rdi, 112(%2) ;\n" 346 : 347 : "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */ 348 : 349 "%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", 350 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14", 351 "cc", "memory" 352 ); 353 } 354 355 #ifdef __clang__ 356 #pragma clang diagnostic pop 357 #endif 358 359 #endif /* defined(ED25519_GCC_64BIT_X86_CHOOSE) */