[ tor ] .git.dasho

modm-donna-64bit.h (13726B)
      1 /*
      2 Public domain by Andrew M. <liquidsun@gmail.com>
      3 */
      4 
      5 
      6 /*
      7 Arithmetic modulo the group order n = 2^252 +  27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
      8 
      9 k = 32
     10 b = 1 << 8 = 256
     11 m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
     12 mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
     13 */
     14 
     15 #define bignum256modm_bits_per_limb 56
     16 #define bignum256modm_limb_size 5
     17 
     18 typedef uint64_t bignum256modm_element_t;
     19 typedef bignum256modm_element_t bignum256modm[5];
     20 
     21 static const bignum256modm modm_m = {
     22 0x12631a5cf5d3ed, 
     23 0xf9dea2f79cd658, 
     24 0x000000000014de, 
     25 0x00000000000000, 
     26 0x00000010000000
     27 };
     28 
     29 static const bignum256modm modm_mu = {
     30 0x9ce5a30a2c131b,
     31 0x215d086329a7ed,
     32 0xffffffffeb2106,
     33 0xffffffffffffff,
     34 0x00000fffffffff
     35 };
     36 
     37 static bignum256modm_element_t
     38 lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
     39 return (a - b) >> 63;
     40 }
     41 
     42 static void
     43 reduce256_modm(bignum256modm r) {
     44 bignum256modm t;
     45 bignum256modm_element_t b = 0, pb, mask;
     46 
     47 /* t = r - m */
     48 pb = 0;
     49 pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;
     50 pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;
     51 pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;
     52 pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;
     53 pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32)); 
     54 
     55 /* keep r if r was smaller than m */
     56 mask = b - 1;
     57 
     58 r[0] ^= mask & (r[0] ^ t[0]);
     59 r[1] ^= mask & (r[1] ^ t[1]);
     60 r[2] ^= mask & (r[2] ^ t[2]);
     61 r[3] ^= mask & (r[3] ^ t[3]);
     62 r[4] ^= mask & (r[4] ^ t[4]);
     63 }
     64 
     65 static void
     66 barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
     67 bignum256modm q3, r2;
     68 uint128_t c, mul;
     69 bignum256modm_element_t f, b, pb;
     70 
     71 /* q1 = x >> 248 = 264 bits = 5 56 bit elements
     72    q2 = mu * q1
     73    q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
     74 mul64x64_128(c, modm_mu[0], q1[3])                 mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);
     75 mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)
     76 f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);
     77 mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)
     78 f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);
     79 mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)
     80 f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);
     81 mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)
     82 f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);
     83 mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)
     84 f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);
     85 q3[4] |= (f << 16);
     86 
     87 mul64x64_128(c, modm_m[0], q3[0]) 
     88 r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
     89 mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)
     90 r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
     91 mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)
     92 r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
     93 mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)
     94 r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
     95 mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)
     96 r2[4] = lo128(c) & 0x0000ffffffffff;
     97 
     98 pb = 0;
     99 pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;
    100 pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;
    101 pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;
    102 pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;
    103 pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40)); 
    104 
    105 reduce256_modm(r);
    106 reduce256_modm(r);
    107 }
    108 
    109 
    110 static void
    111 add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
    112 bignum256modm_element_t c;
    113 
    114 c  = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;
    115 c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;
    116 c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;
    117 c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;
    118 c += x[4] + y[4]; r[4] = c;
    119 
    120 reduce256_modm(r);
    121 }
    122 
    123 static void
    124 mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
    125 bignum256modm q1, r1;
    126 uint128_t c, mul;
    127 bignum256modm_element_t f;
    128 
    129 mul64x64_128(c, x[0], y[0])
    130 f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);
    131 mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul) 
    132 f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);
    133 mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul) 
    134 f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);
    135 mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul) 
    136 f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);
    137 mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul) 
    138 f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
    139 mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul) 
    140 f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
    141 mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul) 
    142 f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
    143 mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul) 
    144 f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
    145 mul64x64_128(c, x[4], y[4]) add128_64(c, f)
    146 f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
    147 q1[4] |= (f << 32);
    148 
    149 barrett_reduce256_modm(r, q1, r1);
    150 }
    151 
    152 static void
    153 expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
    154 unsigned char work[64] = {0};
    155 bignum256modm_element_t x[16];
    156 bignum256modm q1;
    157 
    158 memcpy(work, in, len);
    159 x[0] = U8TO64_LE(work +  0);
    160 x[1] = U8TO64_LE(work +  8);
    161 x[2] = U8TO64_LE(work + 16);
    162 x[3] = U8TO64_LE(work + 24);
    163 x[4] = U8TO64_LE(work + 32);
    164 x[5] = U8TO64_LE(work + 40);
    165 x[6] = U8TO64_LE(work + 48);
    166 x[7] = U8TO64_LE(work + 56);
    167 
    168 /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
    169 out[0] = (                         x[0]) & 0xffffffffffffff;
    170 out[1] = ((x[ 0] >> 56) | (x[ 1] <<  8)) & 0xffffffffffffff;
    171 out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
    172 out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
    173 out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;
    174 
    175 /* under 252 bits, no need to reduce */
    176 if (len < 32)
    177 	return;
    178 
    179 /* q1 = x >> 248 = 264 bits */
    180 q1[0] = ((x[ 3] >> 56) | (x[ 4] <<  8)) & 0xffffffffffffff;
    181 q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;
    182 q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;
    183 q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;
    184 q1[4] = ((x[ 7] >> 24)                );
    185 
    186 barrett_reduce256_modm(out, q1, out);
    187 }
    188 
    189 static void
    190 expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
    191 bignum256modm_element_t x[4];
    192 
    193 x[0] = U8TO64_LE(in +  0);
    194 x[1] = U8TO64_LE(in +  8);
    195 x[2] = U8TO64_LE(in + 16);
    196 x[3] = U8TO64_LE(in + 24);
    197 
    198 out[0] = (                         x[0]) & 0xffffffffffffff;
    199 out[1] = ((x[ 0] >> 56) | (x[ 1] <<  8)) & 0xffffffffffffff;
    200 out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
    201 out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
    202 out[4] = ((x[ 3] >> 32)                ) & 0x000000ffffffff;
    203 }
    204 
    205 static void
    206 contract256_modm(unsigned char out[32], const bignum256modm in) {
    207 U64TO8_LE(out +  0, (in[0]      ) | (in[1] << 56));
    208 U64TO8_LE(out +  8, (in[1] >>  8) | (in[2] << 48));
    209 U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));
    210 U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));
    211 }
    212 
    213 static void
    214 contract256_window4_modm(signed char r[64], const bignum256modm in) {
    215 char carry;
    216 signed char *quads = r;
    217 bignum256modm_element_t i, j, v, m;
    218 
    219 for (i = 0; i < 5; i++) {
    220 	v = in[i];
    221 	m = (i == 4) ? 8 : 14;
    222 	for (j = 0; j < m; j++) {
    223 		*quads++ = (v & 15);
    224 		v >>= 4;
    225 	}
    226 }
    227 
    228 /* making it signed */
    229 carry = 0;
    230 for(i = 0; i < 63; i++) {
    231 	r[i] += carry;
    232 	r[i+1] += (r[i] >> 4);
    233 	r[i] &= 15;
    234 	carry = (r[i] >> 3);
    235 	r[i] -= (carry << 4);
    236 }
    237 r[63] += carry;
    238 }
    239 
    240 static void
    241 contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
    242 int i,j,k,b;
    243 int m = (1 << (windowsize - 1)) - 1;
    244        const int soplen = 256;
    245 signed char *bits = r;
    246 bignum256modm_element_t v;
    247 
    248 /* first put the binary expansion into r  */
    249 for (i = 0; i < 4; i++) {
    250 	v = s[i];
    251 	for (j = 0; j < 56; j++, v >>= 1)
    252 		*bits++ = (v & 1);
    253 }
    254 v = s[4];
    255 for (j = 0; j < 32; j++, v >>= 1)
    256 	*bits++ = (v & 1);
    257 
    258 /* Making it sliding window */
    259 for (j = 0; j < soplen; j++) {
    260 	if (!r[j])
    261 		continue;
    262 
    263 	for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
    264 		/* XXX Tor: coverity scan says that r[j+b] can
    265 		 * overflow, but that's not possible: b < (soplen-j)
    266 		 * guarantees that b + j < soplen, so b+j < 256,
    267 		 * so the index doesn't overflow. */
    268 		if ((r[j] + (r[j + b] << b)) <= m) {
    269 			r[j] += r[j + b] << b;
    270 			r[j + b] = 0;
    271 		} else if ((r[j] - (r[j + b] << b)) >= -m) {
    272 			r[j] -= r[j + b] << b;
    273 			for (k = j + b; k < soplen; k++) {
    274 				if (!r[k]) {
    275 					r[k] = 1;
    276 					break;
    277 				}
    278 				r[k] = 0;
    279 			}
    280 		} else if (r[j + b]) {
    281 			break;
    282 		}
    283 	}
    284 }
    285 }
    286 
    287 /*
    288 helpers for batch verifcation, are allowed to be vartime
    289 */
    290 
    291 /* out = a - b, a must be larger than b */
    292 static void
    293 sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
    294 size_t i = 0;
    295 bignum256modm_element_t carry = 0;
    296 switch (limbsize) {
    297 	case 4: out[i] = (a[i] - b[i])        ; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++; FALLTHROUGH;
    298 	case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++; FALLTHROUGH;
    299 	case 2:	out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++; FALLTHROUGH;
    300 	case 1:	out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 63); out[i] &= 0xffffffffffffff; i++; FALLTHROUGH;
    301 	case 0: 
    302 	default: out[i] = (a[i] - b[i]) - carry;
    303 }
    304 }
    305 
    306 
    307 /* is a < b */
    308 static int
    309 lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
    310 size_t i = 0;
    311 bignum256modm_element_t t, carry = 0;
    312 switch (limbsize) {
    313 	case 4: t = (a[i] - b[i])        ; carry = (t >> 63); i++; FALLTHROUGH;
    314 	case 3: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    315 	case 2: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    316 	case 1: t = (a[i] - b[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    317 	case 0: t = (a[i] - b[i]) - carry; carry = (t >> 63);
    318 }
    319 return (int)carry;
    320 }
    321 
    322 /* is a <= b */
    323 static int
    324 lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
    325 size_t i = 0;
    326 bignum256modm_element_t t, carry = 0;
    327 switch (limbsize) {
    328 	case 4: t = (b[i] - a[i])        ; carry = (t >> 63); i++; FALLTHROUGH;
    329 	case 3: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    330 	case 2: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    331 	case 1: t = (b[i] - a[i]) - carry; carry = (t >> 63); i++; FALLTHROUGH;
    332 	case 0: t = (b[i] - a[i]) - carry; carry = (t >> 63);
    333 }
    334 return (int)!carry;
    335 }
    336 
    337 /* is a == 0 */
    338 static int
    339 iszero256_modm_batch(const bignum256modm a) {
    340 size_t i;
    341 for (i = 0; i < 5; i++)
    342 	if (a[i])
    343 		return 0;
    344 return 1;
    345 }
    346 
    347 /* is a == 1 */
    348 static int
    349 isone256_modm_batch(const bignum256modm a) {
    350 size_t i;
    351 for (i = 0; i < 5; i++)
    352 	if (a[i] != ((i) ? 0 : 1))
    353 		return 0;
    354 return 1;
    355 }
    356 
    357 /* can a fit in to (at most) 128 bits */
    358 static int
    359 isatmost128bits256_modm_batch(const bignum256modm a) {
    360 uint64_t mask =
    361 	((a[4]                   )  | /*  32 */
    362 	 (a[3]                   )  | /*  88 */
    363 	 (a[2] & 0xffffffffff0000));
    364 
    365 return (mask == 0);
    366 }
	tor The Tor anonymity network
	git clone https://git.dasho.dev/tor.git
	Log \| Files \| Refs \| README \| LICENSE