tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

curve25519-donna-64bit.h (14873B)


      1 /*
      2 Public domain by Adam Langley <agl@imperialviolet.org> &
      3                  Andrew M. <liquidsun@gmail.com>
      4 See: https://github.com/floodyberry/curve25519-donna
      5 
      6 64bit integer curve25519 implementation
      7 */
      8 
      9 typedef uint64_t bignum25519[5];
     10 
     11 //static const uint64_t reduce_mask_40 = ((uint64_t)1 << 40) - 1;
     12 static const uint64_t reduce_mask_51 = ((uint64_t)1 << 51) - 1;
     13 //static const uint64_t reduce_mask_56 = ((uint64_t)1 << 56) - 1;
     14 
     15 /* out = in */
     16 DONNA_INLINE static void
     17 curve25519_copy(bignum25519 out, const bignum25519 in) {
     18 out[0] = in[0];
     19 out[1] = in[1];
     20 out[2] = in[2];
     21 out[3] = in[3];
     22 out[4] = in[4];
     23 }
     24 
     25 /* out = a + b */
     26 DONNA_INLINE static void
     27 curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     28 out[0] = a[0] + b[0];
     29 out[1] = a[1] + b[1];
     30 out[2] = a[2] + b[2];
     31 out[3] = a[3] + b[3];
     32 out[4] = a[4] + b[4];
     33 }
     34 
     35 /* out = a + b, where a and/or b are the result of a basic op (add,sub) */
     36 DONNA_INLINE static void
     37 curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     38 out[0] = a[0] + b[0];
     39 out[1] = a[1] + b[1];
     40 out[2] = a[2] + b[2];
     41 out[3] = a[3] + b[3];
     42 out[4] = a[4] + b[4];
     43 }
     44 
     45 DONNA_INLINE static void
     46 curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     47 uint64_t c;
     48 out[0] = a[0] + b[0]    ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
     49 out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
     50 out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
     51 out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
     52 out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
     53 out[0] += c * 19;
     54 }
     55 
     56 /* multiples of p */
     57 static const uint64_t twoP0      = 0x0fffffffffffda;
     58 static const uint64_t twoP1234   = 0x0ffffffffffffe;
     59 static const uint64_t fourP0     = 0x1fffffffffffb4;
     60 static const uint64_t fourP1234  = 0x1ffffffffffffc;
     61 
     62 /* out = a - b */
     63 DONNA_INLINE static void
     64 curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     65 out[0] = a[0] + twoP0    - b[0];
     66 out[1] = a[1] + twoP1234 - b[1];
     67 out[2] = a[2] + twoP1234 - b[2];
     68 out[3] = a[3] + twoP1234 - b[3];
     69 out[4] = a[4] + twoP1234 - b[4];
     70 }
     71 
     72 /* out = a - b, where a and/or b are the result of a basic op (add,sub) */
     73 DONNA_INLINE static void
     74 curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     75 out[0] = a[0] + fourP0    - b[0];
     76 out[1] = a[1] + fourP1234 - b[1];
     77 out[2] = a[2] + fourP1234 - b[2];
     78 out[3] = a[3] + fourP1234 - b[3];
     79 out[4] = a[4] + fourP1234 - b[4];
     80 }
     81 
     82 DONNA_INLINE static void
     83 curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {
     84 uint64_t c;
     85 out[0] = a[0] + fourP0    - b[0]    ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
     86 out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
     87 out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
     88 out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
     89 out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
     90 out[0] += c * 19;
     91 }
     92 
     93 /* out = -a */
     94 DONNA_INLINE static void
     95 curve25519_neg(bignum25519 out, const bignum25519 a) {
     96 uint64_t c;
     97 out[0] = twoP0    - a[0]    ; c = (out[0] >> 51); out[0] &= reduce_mask_51;
     98 out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;
     99 out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;
    100 out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;
    101 out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;
    102 out[0] += c * 19;
    103 }
    104 
    105 /* out = a * b */
    106 DONNA_INLINE static void
    107 curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
    108 #if !defined(HAVE_NATIVE_UINT128)
    109 uint128_t mul;
    110 #endif
    111 uint128_t t[5];
    112 uint64_t r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
    113 
    114 r0 = in[0];
    115 r1 = in[1];
    116 r2 = in[2];
    117 r3 = in[3];
    118 r4 = in[4];
    119 
    120 s0 = in2[0];
    121 s1 = in2[1];
    122 s2 = in2[2];
    123 s3 = in2[3];
    124 s4 = in2[4];
    125 
    126 #if defined(HAVE_NATIVE_UINT128)
    127 t[0]  =  ((uint128_t) r0) * s0;
    128 t[1]  =  ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
    129 t[2]  =  ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
    130 t[3]  =  ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
    131 t[4]  =  ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;
    132 #else
    133 mul64x64_128(t[0], r0, s0)
    134 mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)
    135 mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)
    136 mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)
    137 mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)
    138 #endif
    139 
    140 r1 *= 19;
    141 r2 *= 19;
    142 r3 *= 19;
    143 r4 *= 19;
    144 
    145 #if defined(HAVE_NATIVE_UINT128)
    146 t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
    147 t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
    148 t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
    149 t[3] += ((uint128_t) r4) * s4;
    150 #else
    151 mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)
    152 mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)
    153 mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)
    154 mul64x64_128(mul, r4, s4) add128(t[3], mul)
    155 #endif
    156 
    157 
    158                      r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
    159 add128_64(t[1], c)   r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
    160 add128_64(t[2], c)   r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
    161 add128_64(t[3], c)   r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
    162 add128_64(t[4], c)   r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
    163 r0 +=   c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
    164 r1 +=   c;
    165 
    166 out[0] = r0;
    167 out[1] = r1;
    168 out[2] = r2;
    169 out[3] = r3;
    170 out[4] = r4;
    171 }
    172 
    173 DONNA_NOINLINE static void
    174 curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) {
    175 curve25519_mul(out, in2, in);
    176 }
    177 
    178 /* out = in^(2 * count) */
    179 DONNA_NOINLINE static void
    180 curve25519_square_times(bignum25519 out, const bignum25519 in, uint64_t count) {
    181 #if !defined(HAVE_NATIVE_UINT128)
    182 uint128_t mul;
    183 #endif
    184 uint128_t t[5];
    185 uint64_t r0,r1,r2,r3,r4,c;
    186 uint64_t d0,d1,d2,d4,d419;
    187 
    188 r0 = in[0];
    189 r1 = in[1];
    190 r2 = in[2];
    191 r3 = in[3];
    192 r4 = in[4];
    193 
    194 do {
    195 	d0 = r0 * 2;
    196 	d1 = r1 * 2;
    197 	d2 = r2 * 2 * 19;
    198 	d419 = r4 * 19;
    199 	d4 = d419 * 2;
    200 
    201 #if defined(HAVE_NATIVE_UINT128)
    202 	t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3     ));
    203 	t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
    204 	t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3     ));
    205 	t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419   ));
    206 	t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2     ));
    207 #else
    208 	mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2,      r3) add128(t[0], mul)
    209 	mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
    210 	mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4,      r3) add128(t[2], mul)
    211 	mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4,    d419) add128(t[3], mul)
    212 	mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2,      r2) add128(t[4], mul)
    213 #endif
    214 
    215 	r0 = lo128(t[0]) & reduce_mask_51;
    216 	r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c;
    217 	r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c;
    218 	r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c;
    219 	r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c; 
    220 	                                   shl128(c, t[4], 13); r0 += c * 19;
    221 	               c = r0 >> 51; r0 &= reduce_mask_51;
    222 	r1 += c     ;  c = r1 >> 51; r1 &= reduce_mask_51;
    223 	r2 += c     ;  c = r2 >> 51; r2 &= reduce_mask_51;
    224 	r3 += c     ;  c = r3 >> 51; r3 &= reduce_mask_51;
    225 	r4 += c     ;  c = r4 >> 51; r4 &= reduce_mask_51;
    226 	r0 += c * 19;
    227 } while(--count);
    228 
    229 out[0] = r0;
    230 out[1] = r1;
    231 out[2] = r2;
    232 out[3] = r3;
    233 out[4] = r4;
    234 }
    235 
    236 DONNA_INLINE static void
    237 curve25519_square(bignum25519 out, const bignum25519 in) {
    238 #if !defined(HAVE_NATIVE_UINT128)
    239 uint128_t mul;
    240 #endif
    241 uint128_t t[5];
    242 uint64_t r0,r1,r2,r3,r4,c;
    243 uint64_t d0,d1,d2,d4,d419;
    244 
    245 r0 = in[0];
    246 r1 = in[1];
    247 r2 = in[2];
    248 r3 = in[3];
    249 r4 = in[4];
    250 
    251 d0 = r0 * 2;
    252 d1 = r1 * 2;
    253 d2 = r2 * 2 * 19;
    254 d419 = r4 * 19;
    255 d4 = d419 * 2;
    256 
    257 #if defined(HAVE_NATIVE_UINT128)
    258 t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3     ));
    259 t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
    260 t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3     ));
    261 t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419   ));
    262 t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2     ));
    263 #else
    264 mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2,      r3) add128(t[0], mul)
    265 mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)
    266 mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4,      r3) add128(t[2], mul)
    267 mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4,    d419) add128(t[3], mul)
    268 mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2,      r2) add128(t[4], mul)
    269 #endif
    270 
    271                      r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);
    272 add128_64(t[1], c)   r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);
    273 add128_64(t[2], c)   r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);
    274 add128_64(t[3], c)   r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);
    275 add128_64(t[4], c)   r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);
    276 r0 +=   c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;
    277 r1 +=   c;
    278 
    279 out[0] = r0;
    280 out[1] = r1;
    281 out[2] = r2;
    282 out[3] = r3;
    283 out[4] = r4;
    284 }
    285 
    286 /* Take a little-endian, 32-byte number and expand it into polynomial form */
    287 DONNA_INLINE static void
    288 curve25519_expand(bignum25519 out, const unsigned char *in) {
    289 static const union { uint8_t b[2]; uint16_t s; } endian_check = {{1,0}};
    290 uint64_t x0,x1,x2,x3;
    291 
    292 if (endian_check.s == 1) {
    293 	x0 = *(uint64_t *)(in + 0);
    294 	x1 = *(uint64_t *)(in + 8);
    295 	x2 = *(uint64_t *)(in + 16);
    296 	x3 = *(uint64_t *)(in + 24);
    297 } else {
    298 	#define F(s)                         \
    299 		((((uint64_t)in[s + 0])      ) | \
    300 		 (((uint64_t)in[s + 1]) <<  8) | \
    301 		 (((uint64_t)in[s + 2]) << 16) | \
    302 		 (((uint64_t)in[s + 3]) << 24) | \
    303 		 (((uint64_t)in[s + 4]) << 32) | \
    304 		 (((uint64_t)in[s + 5]) << 40) | \
    305 		 (((uint64_t)in[s + 6]) << 48) | \
    306 		 (((uint64_t)in[s + 7]) << 56))
    307 
    308 	x0 = F(0);
    309 	x1 = F(8);
    310 	x2 = F(16);
    311 	x3 = F(24);
    312 }
    313 
    314 out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);
    315 out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);
    316 out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);
    317 out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);
    318 out[4] = x3 & reduce_mask_51;
    319 }
    320 
    321 /* Take a fully reduced polynomial form number and contract it into a
    322 * little-endian, 32-byte array
    323 */
    324 DONNA_INLINE static void
    325 curve25519_contract(unsigned char *out, const bignum25519 input) {
    326 uint64_t t[5];
    327 uint64_t f, i;
    328 
    329 t[0] = input[0];
    330 t[1] = input[1];
    331 t[2] = input[2];
    332 t[3] = input[3];
    333 t[4] = input[4];
    334 
    335 #define curve25519_contract_carry() \
    336 	t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \
    337 	t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \
    338 	t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \
    339 	t[4] += t[3] >> 51; t[3] &= reduce_mask_51;
    340 
    341 #define curve25519_contract_carry_full() curve25519_contract_carry() \
    342 	t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;
    343 
    344 #define curve25519_contract_carry_final() curve25519_contract_carry() \
    345 	t[4] &= reduce_mask_51;
    346 
    347 curve25519_contract_carry_full()
    348 curve25519_contract_carry_full()
    349 
    350 /* now t is between 0 and 2^255-1, properly carried. */
    351 /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
    352 t[0] += 19;
    353 curve25519_contract_carry_full()
    354 
    355 /* now between 19 and 2^255-1 in both cases, and offset by 19. */
    356 t[0] += (reduce_mask_51 + 1) - 19;
    357 t[1] += (reduce_mask_51 + 1) - 1;
    358 t[2] += (reduce_mask_51 + 1) - 1;
    359 t[3] += (reduce_mask_51 + 1) - 1;
    360 t[4] += (reduce_mask_51 + 1) - 1;
    361 
    362 /* now between 2^255 and 2^256-20, and offset by 2^255. */
    363 curve25519_contract_carry_final()
    364 
    365 #define write51full(n,shift) \
    366 	f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \
    367 	for (i = 0; i < 8; i++, f >>= 8) *out++ = (unsigned char)f;
    368 #define write51(n) write51full(n,13*n)
    369 write51(0)
    370 write51(1)
    371 write51(2)
    372 write51(3)
    373 }
    374 
    375 #if !defined(ED25519_GCC_64BIT_CHOOSE)
    376 
    377 /* out = (flag) ? in : out */
    378 DONNA_INLINE static void
    379 curve25519_move_conditional_bytes(uint8_t out[96], const uint8_t in[96], uint64_t flag) {
    380 const uint64_t nb = flag - 1, b = ~nb;
    381 const uint64_t *inq = (const uint64_t *)in;
    382 uint64_t *outq = (uint64_t *)out;
    383 outq[0] = (outq[0] & nb) | (inq[0] & b);
    384 outq[1] = (outq[1] & nb) | (inq[1] & b);
    385 outq[2] = (outq[2] & nb) | (inq[2] & b);
    386 outq[3] = (outq[3] & nb) | (inq[3] & b);
    387 outq[4] = (outq[4] & nb) | (inq[4] & b);
    388 outq[5] = (outq[5] & nb) | (inq[5] & b);
    389 outq[6] = (outq[6] & nb) | (inq[6] & b);
    390 outq[7] = (outq[7] & nb) | (inq[7] & b);
    391 outq[8] = (outq[8] & nb) | (inq[8] & b);
    392 outq[9] = (outq[9] & nb) | (inq[9] & b);
    393 outq[10] = (outq[10] & nb) | (inq[10] & b);
    394 outq[11] = (outq[11] & nb) | (inq[11] & b);
    395 }
    396 
    397 /* if (iswap) swap(a, b) */
    398 DONNA_INLINE static void
    399 curve25519_swap_conditional(bignum25519 a, bignum25519 b, uint64_t iswap) {
    400 const uint64_t swap = (uint64_t)(-(int64_t)iswap);
    401 uint64_t x0,x1,x2,x3,x4;
    402 
    403 x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;
    404 x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;
    405 x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;
    406 x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;
    407 x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;
    408 }
    409 
    410 #endif /* ED25519_GCC_64BIT_CHOOSE */
    411 
    412 #define ED25519_64BIT_TABLES