curve25519-donna-64bit.h (14873B)
1 /* 2 Public domain by Adam Langley <agl@imperialviolet.org> & 3 Andrew M. <liquidsun@gmail.com> 4 See: https://github.com/floodyberry/curve25519-donna 5 6 64bit integer curve25519 implementation 7 */ 8 9 typedef uint64_t bignum25519[5]; 10 11 //static const uint64_t reduce_mask_40 = ((uint64_t)1 << 40) - 1; 12 static const uint64_t reduce_mask_51 = ((uint64_t)1 << 51) - 1; 13 //static const uint64_t reduce_mask_56 = ((uint64_t)1 << 56) - 1; 14 15 /* out = in */ 16 DONNA_INLINE static void 17 curve25519_copy(bignum25519 out, const bignum25519 in) { 18 out[0] = in[0]; 19 out[1] = in[1]; 20 out[2] = in[2]; 21 out[3] = in[3]; 22 out[4] = in[4]; 23 } 24 25 /* out = a + b */ 26 DONNA_INLINE static void 27 curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) { 28 out[0] = a[0] + b[0]; 29 out[1] = a[1] + b[1]; 30 out[2] = a[2] + b[2]; 31 out[3] = a[3] + b[3]; 32 out[4] = a[4] + b[4]; 33 } 34 35 /* out = a + b, where a and/or b are the result of a basic op (add,sub) */ 36 DONNA_INLINE static void 37 curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) { 38 out[0] = a[0] + b[0]; 39 out[1] = a[1] + b[1]; 40 out[2] = a[2] + b[2]; 41 out[3] = a[3] + b[3]; 42 out[4] = a[4] + b[4]; 43 } 44 45 DONNA_INLINE static void 46 curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) { 47 uint64_t c; 48 out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; 49 out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; 50 out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; 51 out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; 52 out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; 53 out[0] += c * 19; 54 } 55 56 /* multiples of p */ 57 static const uint64_t twoP0 = 0x0fffffffffffda; 58 static const uint64_t twoP1234 = 0x0ffffffffffffe; 59 static const uint64_t fourP0 = 0x1fffffffffffb4; 60 static const uint64_t fourP1234 = 0x1ffffffffffffc; 61 62 /* out = a - b */ 63 DONNA_INLINE static void 64 curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) { 65 out[0] = a[0] + twoP0 - b[0]; 66 out[1] = a[1] + twoP1234 - b[1]; 67 out[2] = a[2] + twoP1234 - b[2]; 68 out[3] = a[3] + twoP1234 - b[3]; 69 out[4] = a[4] + twoP1234 - b[4]; 70 } 71 72 /* out = a - b, where a and/or b are the result of a basic op (add,sub) */ 73 DONNA_INLINE static void 74 curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) { 75 out[0] = a[0] + fourP0 - b[0]; 76 out[1] = a[1] + fourP1234 - b[1]; 77 out[2] = a[2] + fourP1234 - b[2]; 78 out[3] = a[3] + fourP1234 - b[3]; 79 out[4] = a[4] + fourP1234 - b[4]; 80 } 81 82 DONNA_INLINE static void 83 curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) { 84 uint64_t c; 85 out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; 86 out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; 87 out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; 88 out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; 89 out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; 90 out[0] += c * 19; 91 } 92 93 /* out = -a */ 94 DONNA_INLINE static void 95 curve25519_neg(bignum25519 out, const bignum25519 a) { 96 uint64_t c; 97 out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; 98 out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; 99 out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; 100 out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; 101 out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; 102 out[0] += c * 19; 103 } 104 105 /* out = a * b */ 106 DONNA_INLINE static void 107 curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) { 108 #if !defined(HAVE_NATIVE_UINT128) 109 uint128_t mul; 110 #endif 111 uint128_t t[5]; 112 uint64_t r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; 113 114 r0 = in[0]; 115 r1 = in[1]; 116 r2 = in[2]; 117 r3 = in[3]; 118 r4 = in[4]; 119 120 s0 = in2[0]; 121 s1 = in2[1]; 122 s2 = in2[2]; 123 s3 = in2[3]; 124 s4 = in2[4]; 125 126 #if defined(HAVE_NATIVE_UINT128) 127 t[0] = ((uint128_t) r0) * s0; 128 t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0; 129 t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1; 130 t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1; 131 t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2; 132 #else 133 mul64x64_128(t[0], r0, s0) 134 mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul) 135 mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul) 136 mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul) 137 mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul) 138 #endif 139 140 r1 *= 19; 141 r2 *= 19; 142 r3 *= 19; 143 r4 *= 19; 144 145 #if defined(HAVE_NATIVE_UINT128) 146 t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2; 147 t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3; 148 t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4; 149 t[3] += ((uint128_t) r4) * s4; 150 #else 151 mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul) 152 mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul) 153 mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul) 154 mul64x64_128(mul, r4, s4) add128(t[3], mul) 155 #endif 156 157 158 r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); 159 add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); 160 add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); 161 add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); 162 add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); 163 r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; 164 r1 += c; 165 166 out[0] = r0; 167 out[1] = r1; 168 out[2] = r2; 169 out[3] = r3; 170 out[4] = r4; 171 } 172 173 DONNA_NOINLINE static void 174 curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) { 175 curve25519_mul(out, in2, in); 176 } 177 178 /* out = in^(2 * count) */ 179 DONNA_NOINLINE static void 180 curve25519_square_times(bignum25519 out, const bignum25519 in, uint64_t count) { 181 #if !defined(HAVE_NATIVE_UINT128) 182 uint128_t mul; 183 #endif 184 uint128_t t[5]; 185 uint64_t r0,r1,r2,r3,r4,c; 186 uint64_t d0,d1,d2,d4,d419; 187 188 r0 = in[0]; 189 r1 = in[1]; 190 r2 = in[2]; 191 r3 = in[3]; 192 r4 = in[4]; 193 194 do { 195 d0 = r0 * 2; 196 d1 = r1 * 2; 197 d2 = r2 * 2 * 19; 198 d419 = r4 * 19; 199 d4 = d419 * 2; 200 201 #if defined(HAVE_NATIVE_UINT128) 202 t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 )); 203 t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19)); 204 t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 )); 205 t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 )); 206 t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 )); 207 #else 208 mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) 209 mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) 210 mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) 211 mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) 212 mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) 213 #endif 214 215 r0 = lo128(t[0]) & reduce_mask_51; 216 r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c; 217 r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c; 218 r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c; 219 r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c; 220 shl128(c, t[4], 13); r0 += c * 19; 221 c = r0 >> 51; r0 &= reduce_mask_51; 222 r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51; 223 r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51; 224 r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51; 225 r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51; 226 r0 += c * 19; 227 } while(--count); 228 229 out[0] = r0; 230 out[1] = r1; 231 out[2] = r2; 232 out[3] = r3; 233 out[4] = r4; 234 } 235 236 DONNA_INLINE static void 237 curve25519_square(bignum25519 out, const bignum25519 in) { 238 #if !defined(HAVE_NATIVE_UINT128) 239 uint128_t mul; 240 #endif 241 uint128_t t[5]; 242 uint64_t r0,r1,r2,r3,r4,c; 243 uint64_t d0,d1,d2,d4,d419; 244 245 r0 = in[0]; 246 r1 = in[1]; 247 r2 = in[2]; 248 r3 = in[3]; 249 r4 = in[4]; 250 251 d0 = r0 * 2; 252 d1 = r1 * 2; 253 d2 = r2 * 2 * 19; 254 d419 = r4 * 19; 255 d4 = d419 * 2; 256 257 #if defined(HAVE_NATIVE_UINT128) 258 t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 )); 259 t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19)); 260 t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 )); 261 t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 )); 262 t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 )); 263 #else 264 mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) 265 mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) 266 mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) 267 mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) 268 mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) 269 #endif 270 271 r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); 272 add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); 273 add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); 274 add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); 275 add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); 276 r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; 277 r1 += c; 278 279 out[0] = r0; 280 out[1] = r1; 281 out[2] = r2; 282 out[3] = r3; 283 out[4] = r4; 284 } 285 286 /* Take a little-endian, 32-byte number and expand it into polynomial form */ 287 DONNA_INLINE static void 288 curve25519_expand(bignum25519 out, const unsigned char *in) { 289 static const union { uint8_t b[2]; uint16_t s; } endian_check = {{1,0}}; 290 uint64_t x0,x1,x2,x3; 291 292 if (endian_check.s == 1) { 293 x0 = *(uint64_t *)(in + 0); 294 x1 = *(uint64_t *)(in + 8); 295 x2 = *(uint64_t *)(in + 16); 296 x3 = *(uint64_t *)(in + 24); 297 } else { 298 #define F(s) \ 299 ((((uint64_t)in[s + 0]) ) | \ 300 (((uint64_t)in[s + 1]) << 8) | \ 301 (((uint64_t)in[s + 2]) << 16) | \ 302 (((uint64_t)in[s + 3]) << 24) | \ 303 (((uint64_t)in[s + 4]) << 32) | \ 304 (((uint64_t)in[s + 5]) << 40) | \ 305 (((uint64_t)in[s + 6]) << 48) | \ 306 (((uint64_t)in[s + 7]) << 56)) 307 308 x0 = F(0); 309 x1 = F(8); 310 x2 = F(16); 311 x3 = F(24); 312 } 313 314 out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13); 315 out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26); 316 out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39); 317 out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12); 318 out[4] = x3 & reduce_mask_51; 319 } 320 321 /* Take a fully reduced polynomial form number and contract it into a 322 * little-endian, 32-byte array 323 */ 324 DONNA_INLINE static void 325 curve25519_contract(unsigned char *out, const bignum25519 input) { 326 uint64_t t[5]; 327 uint64_t f, i; 328 329 t[0] = input[0]; 330 t[1] = input[1]; 331 t[2] = input[2]; 332 t[3] = input[3]; 333 t[4] = input[4]; 334 335 #define curve25519_contract_carry() \ 336 t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \ 337 t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \ 338 t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \ 339 t[4] += t[3] >> 51; t[3] &= reduce_mask_51; 340 341 #define curve25519_contract_carry_full() curve25519_contract_carry() \ 342 t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51; 343 344 #define curve25519_contract_carry_final() curve25519_contract_carry() \ 345 t[4] &= reduce_mask_51; 346 347 curve25519_contract_carry_full() 348 curve25519_contract_carry_full() 349 350 /* now t is between 0 and 2^255-1, properly carried. */ 351 /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ 352 t[0] += 19; 353 curve25519_contract_carry_full() 354 355 /* now between 19 and 2^255-1 in both cases, and offset by 19. */ 356 t[0] += (reduce_mask_51 + 1) - 19; 357 t[1] += (reduce_mask_51 + 1) - 1; 358 t[2] += (reduce_mask_51 + 1) - 1; 359 t[3] += (reduce_mask_51 + 1) - 1; 360 t[4] += (reduce_mask_51 + 1) - 1; 361 362 /* now between 2^255 and 2^256-20, and offset by 2^255. */ 363 curve25519_contract_carry_final() 364 365 #define write51full(n,shift) \ 366 f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \ 367 for (i = 0; i < 8; i++, f >>= 8) *out++ = (unsigned char)f; 368 #define write51(n) write51full(n,13*n) 369 write51(0) 370 write51(1) 371 write51(2) 372 write51(3) 373 } 374 375 #if !defined(ED25519_GCC_64BIT_CHOOSE) 376 377 /* out = (flag) ? in : out */ 378 DONNA_INLINE static void 379 curve25519_move_conditional_bytes(uint8_t out[96], const uint8_t in[96], uint64_t flag) { 380 const uint64_t nb = flag - 1, b = ~nb; 381 const uint64_t *inq = (const uint64_t *)in; 382 uint64_t *outq = (uint64_t *)out; 383 outq[0] = (outq[0] & nb) | (inq[0] & b); 384 outq[1] = (outq[1] & nb) | (inq[1] & b); 385 outq[2] = (outq[2] & nb) | (inq[2] & b); 386 outq[3] = (outq[3] & nb) | (inq[3] & b); 387 outq[4] = (outq[4] & nb) | (inq[4] & b); 388 outq[5] = (outq[5] & nb) | (inq[5] & b); 389 outq[6] = (outq[6] & nb) | (inq[6] & b); 390 outq[7] = (outq[7] & nb) | (inq[7] & b); 391 outq[8] = (outq[8] & nb) | (inq[8] & b); 392 outq[9] = (outq[9] & nb) | (inq[9] & b); 393 outq[10] = (outq[10] & nb) | (inq[10] & b); 394 outq[11] = (outq[11] & nb) | (inq[11] & b); 395 } 396 397 /* if (iswap) swap(a, b) */ 398 DONNA_INLINE static void 399 curve25519_swap_conditional(bignum25519 a, bignum25519 b, uint64_t iswap) { 400 const uint64_t swap = (uint64_t)(-(int64_t)iswap); 401 uint64_t x0,x1,x2,x3,x4; 402 403 x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0; 404 x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1; 405 x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2; 406 x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3; 407 x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4; 408 } 409 410 #endif /* ED25519_GCC_64BIT_CHOOSE */ 411 412 #define ED25519_64BIT_TABLES