curve25519-ref10.c (38169B)
1 #include <stdint.h> 2 3 typedef int32_t crypto_int32; 4 typedef int64_t crypto_int64; 5 typedef uint64_t crypto_uint64; 6 7 typedef crypto_int32 fe[10]; 8 9 /* 10 h = 0 11 */ 12 13 void fe_0(fe h) 14 { 15 h[0] = 0; 16 h[1] = 0; 17 h[2] = 0; 18 h[3] = 0; 19 h[4] = 0; 20 h[5] = 0; 21 h[6] = 0; 22 h[7] = 0; 23 h[8] = 0; 24 h[9] = 0; 25 } 26 27 /* 28 h = 1 29 */ 30 31 void fe_1(fe h) 32 { 33 h[0] = 1; 34 h[1] = 0; 35 h[2] = 0; 36 h[3] = 0; 37 h[4] = 0; 38 h[5] = 0; 39 h[6] = 0; 40 h[7] = 0; 41 h[8] = 0; 42 h[9] = 0; 43 } 44 45 /* 46 h = f + g 47 Can overlap h with f or g. 48 49 Preconditions: 50 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 51 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 52 53 Postconditions: 54 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 55 */ 56 57 void fe_add(fe h,fe f,fe g) 58 { 59 crypto_int32 f0 = f[0]; 60 crypto_int32 f1 = f[1]; 61 crypto_int32 f2 = f[2]; 62 crypto_int32 f3 = f[3]; 63 crypto_int32 f4 = f[4]; 64 crypto_int32 f5 = f[5]; 65 crypto_int32 f6 = f[6]; 66 crypto_int32 f7 = f[7]; 67 crypto_int32 f8 = f[8]; 68 crypto_int32 f9 = f[9]; 69 crypto_int32 g0 = g[0]; 70 crypto_int32 g1 = g[1]; 71 crypto_int32 g2 = g[2]; 72 crypto_int32 g3 = g[3]; 73 crypto_int32 g4 = g[4]; 74 crypto_int32 g5 = g[5]; 75 crypto_int32 g6 = g[6]; 76 crypto_int32 g7 = g[7]; 77 crypto_int32 g8 = g[8]; 78 crypto_int32 g9 = g[9]; 79 crypto_int32 h0 = f0 + g0; 80 crypto_int32 h1 = f1 + g1; 81 crypto_int32 h2 = f2 + g2; 82 crypto_int32 h3 = f3 + g3; 83 crypto_int32 h4 = f4 + g4; 84 crypto_int32 h5 = f5 + g5; 85 crypto_int32 h6 = f6 + g6; 86 crypto_int32 h7 = f7 + g7; 87 crypto_int32 h8 = f8 + g8; 88 crypto_int32 h9 = f9 + g9; 89 h[0] = h0; 90 h[1] = h1; 91 h[2] = h2; 92 h[3] = h3; 93 h[4] = h4; 94 h[5] = h5; 95 h[6] = h6; 96 h[7] = h7; 97 h[8] = h8; 98 h[9] = h9; 99 } 100 101 /* 102 h = f 103 */ 104 105 void fe_copy(fe h,fe f) 106 { 107 crypto_int32 f0 = f[0]; 108 crypto_int32 f1 = f[1]; 109 crypto_int32 f2 = f[2]; 110 crypto_int32 f3 = f[3]; 111 crypto_int32 f4 = f[4]; 112 crypto_int32 f5 = f[5]; 113 crypto_int32 f6 = f[6]; 114 crypto_int32 f7 = f[7]; 115 crypto_int32 f8 = f[8]; 116 crypto_int32 f9 = f[9]; 117 h[0] = f0; 118 h[1] = f1; 119 h[2] = f2; 120 h[3] = f3; 121 h[4] = f4; 122 h[5] = f5; 123 h[6] = f6; 124 h[7] = f7; 125 h[8] = f8; 126 h[9] = f9; 127 } 128 129 130 /* 131 Replace (f,g) with (g,f) if b == 1; 132 replace (f,g) with (f,g) if b == 0. 133 134 Preconditions: b in {0,1}. 135 */ 136 137 void fe_cswap(fe f,fe g,unsigned int b) 138 { 139 crypto_int32 f0 = f[0]; 140 crypto_int32 f1 = f[1]; 141 crypto_int32 f2 = f[2]; 142 crypto_int32 f3 = f[3]; 143 crypto_int32 f4 = f[4]; 144 crypto_int32 f5 = f[5]; 145 crypto_int32 f6 = f[6]; 146 crypto_int32 f7 = f[7]; 147 crypto_int32 f8 = f[8]; 148 crypto_int32 f9 = f[9]; 149 crypto_int32 g0 = g[0]; 150 crypto_int32 g1 = g[1]; 151 crypto_int32 g2 = g[2]; 152 crypto_int32 g3 = g[3]; 153 crypto_int32 g4 = g[4]; 154 crypto_int32 g5 = g[5]; 155 crypto_int32 g6 = g[6]; 156 crypto_int32 g7 = g[7]; 157 crypto_int32 g8 = g[8]; 158 crypto_int32 g9 = g[9]; 159 crypto_int32 x0 = f0 ^ g0; 160 crypto_int32 x1 = f1 ^ g1; 161 crypto_int32 x2 = f2 ^ g2; 162 crypto_int32 x3 = f3 ^ g3; 163 crypto_int32 x4 = f4 ^ g4; 164 crypto_int32 x5 = f5 ^ g5; 165 crypto_int32 x6 = f6 ^ g6; 166 crypto_int32 x7 = f7 ^ g7; 167 crypto_int32 x8 = f8 ^ g8; 168 crypto_int32 x9 = f9 ^ g9; 169 b = -b; 170 x0 &= b; 171 x1 &= b; 172 x2 &= b; 173 x3 &= b; 174 x4 &= b; 175 x5 &= b; 176 x6 &= b; 177 x7 &= b; 178 x8 &= b; 179 x9 &= b; 180 f[0] = f0 ^ x0; 181 f[1] = f1 ^ x1; 182 f[2] = f2 ^ x2; 183 f[3] = f3 ^ x3; 184 f[4] = f4 ^ x4; 185 f[5] = f5 ^ x5; 186 f[6] = f6 ^ x6; 187 f[7] = f7 ^ x7; 188 f[8] = f8 ^ x8; 189 f[9] = f9 ^ x9; 190 g[0] = g0 ^ x0; 191 g[1] = g1 ^ x1; 192 g[2] = g2 ^ x2; 193 g[3] = g3 ^ x3; 194 g[4] = g4 ^ x4; 195 g[5] = g5 ^ x5; 196 g[6] = g6 ^ x6; 197 g[7] = g7 ^ x7; 198 g[8] = g8 ^ x8; 199 g[9] = g9 ^ x9; 200 } 201 202 static crypto_uint64 load_3(const unsigned char *in) 203 { 204 crypto_uint64 result; 205 result = (crypto_uint64) in[0]; 206 result |= ((crypto_uint64) in[1]) << 8; 207 result |= ((crypto_uint64) in[2]) << 16; 208 return result; 209 } 210 211 static crypto_uint64 load_4(const unsigned char *in) 212 { 213 crypto_uint64 result; 214 result = (crypto_uint64) in[0]; 215 result |= ((crypto_uint64) in[1]) << 8; 216 result |= ((crypto_uint64) in[2]) << 16; 217 result |= ((crypto_uint64) in[3]) << 24; 218 return result; 219 } 220 221 void fe_frombytes(fe h,const unsigned char *s) 222 { 223 crypto_int64 h0 = load_4(s); 224 crypto_int64 h1 = load_3(s + 4) << 6; 225 crypto_int64 h2 = load_3(s + 7) << 5; 226 crypto_int64 h3 = load_3(s + 10) << 3; 227 crypto_int64 h4 = load_3(s + 13) << 2; 228 crypto_int64 h5 = load_4(s + 16); 229 crypto_int64 h6 = load_3(s + 20) << 7; 230 crypto_int64 h7 = load_3(s + 23) << 5; 231 crypto_int64 h8 = load_3(s + 26) << 4; 232 crypto_int64 h9 = load_3(s + 29) << 2; 233 crypto_int64 carry0; 234 crypto_int64 carry1; 235 crypto_int64 carry2; 236 crypto_int64 carry3; 237 crypto_int64 carry4; 238 crypto_int64 carry5; 239 crypto_int64 carry6; 240 crypto_int64 carry7; 241 crypto_int64 carry8; 242 crypto_int64 carry9; 243 244 carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 245 carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 246 carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 247 carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 248 carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 249 250 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 251 carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 252 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 253 carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 254 carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 255 256 h[0] = h0; 257 h[1] = h1; 258 h[2] = h2; 259 h[3] = h3; 260 h[4] = h4; 261 h[5] = h5; 262 h[6] = h6; 263 h[7] = h7; 264 h[8] = h8; 265 h[9] = h9; 266 } 267 268 269 /* 270 h = f * g 271 Can overlap h with f or g. 272 273 Preconditions: 274 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 275 |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 276 277 Postconditions: 278 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 279 */ 280 281 /* 282 Notes on implementation strategy: 283 284 Using schoolbook multiplication. 285 Karatsuba would save a little in some cost models. 286 287 Most multiplications by 2 and 19 are 32-bit precomputations; 288 cheaper than 64-bit postcomputations. 289 290 There is one remaining multiplication by 19 in the carry chain; 291 one *19 precomputation can be merged into this, 292 but the resulting data flow is considerably less clean. 293 294 There are 12 carries below. 295 10 of them are 2-way parallelizable and vectorizable. 296 Can get away with 11 carries, but then data flow is much deeper. 297 298 With tighter constraints on inputs can squeeze carries into int32. 299 */ 300 301 void fe_mul(fe h,fe f,fe g) 302 { 303 crypto_int32 f0 = f[0]; 304 crypto_int32 f1 = f[1]; 305 crypto_int32 f2 = f[2]; 306 crypto_int32 f3 = f[3]; 307 crypto_int32 f4 = f[4]; 308 crypto_int32 f5 = f[5]; 309 crypto_int32 f6 = f[6]; 310 crypto_int32 f7 = f[7]; 311 crypto_int32 f8 = f[8]; 312 crypto_int32 f9 = f[9]; 313 crypto_int32 g0 = g[0]; 314 crypto_int32 g1 = g[1]; 315 crypto_int32 g2 = g[2]; 316 crypto_int32 g3 = g[3]; 317 crypto_int32 g4 = g[4]; 318 crypto_int32 g5 = g[5]; 319 crypto_int32 g6 = g[6]; 320 crypto_int32 g7 = g[7]; 321 crypto_int32 g8 = g[8]; 322 crypto_int32 g9 = g[9]; 323 crypto_int32 g1_19 = 19 * g1; /* 1.4*2^29 */ 324 crypto_int32 g2_19 = 19 * g2; /* 1.4*2^30; still ok */ 325 crypto_int32 g3_19 = 19 * g3; 326 crypto_int32 g4_19 = 19 * g4; 327 crypto_int32 g5_19 = 19 * g5; 328 crypto_int32 g6_19 = 19 * g6; 329 crypto_int32 g7_19 = 19 * g7; 330 crypto_int32 g8_19 = 19 * g8; 331 crypto_int32 g9_19 = 19 * g9; 332 crypto_int32 f1_2 = 2 * f1; 333 crypto_int32 f3_2 = 2 * f3; 334 crypto_int32 f5_2 = 2 * f5; 335 crypto_int32 f7_2 = 2 * f7; 336 crypto_int32 f9_2 = 2 * f9; 337 crypto_int64 f0g0 = f0 * (crypto_int64) g0; 338 crypto_int64 f0g1 = f0 * (crypto_int64) g1; 339 crypto_int64 f0g2 = f0 * (crypto_int64) g2; 340 crypto_int64 f0g3 = f0 * (crypto_int64) g3; 341 crypto_int64 f0g4 = f0 * (crypto_int64) g4; 342 crypto_int64 f0g5 = f0 * (crypto_int64) g5; 343 crypto_int64 f0g6 = f0 * (crypto_int64) g6; 344 crypto_int64 f0g7 = f0 * (crypto_int64) g7; 345 crypto_int64 f0g8 = f0 * (crypto_int64) g8; 346 crypto_int64 f0g9 = f0 * (crypto_int64) g9; 347 crypto_int64 f1g0 = f1 * (crypto_int64) g0; 348 crypto_int64 f1g1_2 = f1_2 * (crypto_int64) g1; 349 crypto_int64 f1g2 = f1 * (crypto_int64) g2; 350 crypto_int64 f1g3_2 = f1_2 * (crypto_int64) g3; 351 crypto_int64 f1g4 = f1 * (crypto_int64) g4; 352 crypto_int64 f1g5_2 = f1_2 * (crypto_int64) g5; 353 crypto_int64 f1g6 = f1 * (crypto_int64) g6; 354 crypto_int64 f1g7_2 = f1_2 * (crypto_int64) g7; 355 crypto_int64 f1g8 = f1 * (crypto_int64) g8; 356 crypto_int64 f1g9_38 = f1_2 * (crypto_int64) g9_19; 357 crypto_int64 f2g0 = f2 * (crypto_int64) g0; 358 crypto_int64 f2g1 = f2 * (crypto_int64) g1; 359 crypto_int64 f2g2 = f2 * (crypto_int64) g2; 360 crypto_int64 f2g3 = f2 * (crypto_int64) g3; 361 crypto_int64 f2g4 = f2 * (crypto_int64) g4; 362 crypto_int64 f2g5 = f2 * (crypto_int64) g5; 363 crypto_int64 f2g6 = f2 * (crypto_int64) g6; 364 crypto_int64 f2g7 = f2 * (crypto_int64) g7; 365 crypto_int64 f2g8_19 = f2 * (crypto_int64) g8_19; 366 crypto_int64 f2g9_19 = f2 * (crypto_int64) g9_19; 367 crypto_int64 f3g0 = f3 * (crypto_int64) g0; 368 crypto_int64 f3g1_2 = f3_2 * (crypto_int64) g1; 369 crypto_int64 f3g2 = f3 * (crypto_int64) g2; 370 crypto_int64 f3g3_2 = f3_2 * (crypto_int64) g3; 371 crypto_int64 f3g4 = f3 * (crypto_int64) g4; 372 crypto_int64 f3g5_2 = f3_2 * (crypto_int64) g5; 373 crypto_int64 f3g6 = f3 * (crypto_int64) g6; 374 crypto_int64 f3g7_38 = f3_2 * (crypto_int64) g7_19; 375 crypto_int64 f3g8_19 = f3 * (crypto_int64) g8_19; 376 crypto_int64 f3g9_38 = f3_2 * (crypto_int64) g9_19; 377 crypto_int64 f4g0 = f4 * (crypto_int64) g0; 378 crypto_int64 f4g1 = f4 * (crypto_int64) g1; 379 crypto_int64 f4g2 = f4 * (crypto_int64) g2; 380 crypto_int64 f4g3 = f4 * (crypto_int64) g3; 381 crypto_int64 f4g4 = f4 * (crypto_int64) g4; 382 crypto_int64 f4g5 = f4 * (crypto_int64) g5; 383 crypto_int64 f4g6_19 = f4 * (crypto_int64) g6_19; 384 crypto_int64 f4g7_19 = f4 * (crypto_int64) g7_19; 385 crypto_int64 f4g8_19 = f4 * (crypto_int64) g8_19; 386 crypto_int64 f4g9_19 = f4 * (crypto_int64) g9_19; 387 crypto_int64 f5g0 = f5 * (crypto_int64) g0; 388 crypto_int64 f5g1_2 = f5_2 * (crypto_int64) g1; 389 crypto_int64 f5g2 = f5 * (crypto_int64) g2; 390 crypto_int64 f5g3_2 = f5_2 * (crypto_int64) g3; 391 crypto_int64 f5g4 = f5 * (crypto_int64) g4; 392 crypto_int64 f5g5_38 = f5_2 * (crypto_int64) g5_19; 393 crypto_int64 f5g6_19 = f5 * (crypto_int64) g6_19; 394 crypto_int64 f5g7_38 = f5_2 * (crypto_int64) g7_19; 395 crypto_int64 f5g8_19 = f5 * (crypto_int64) g8_19; 396 crypto_int64 f5g9_38 = f5_2 * (crypto_int64) g9_19; 397 crypto_int64 f6g0 = f6 * (crypto_int64) g0; 398 crypto_int64 f6g1 = f6 * (crypto_int64) g1; 399 crypto_int64 f6g2 = f6 * (crypto_int64) g2; 400 crypto_int64 f6g3 = f6 * (crypto_int64) g3; 401 crypto_int64 f6g4_19 = f6 * (crypto_int64) g4_19; 402 crypto_int64 f6g5_19 = f6 * (crypto_int64) g5_19; 403 crypto_int64 f6g6_19 = f6 * (crypto_int64) g6_19; 404 crypto_int64 f6g7_19 = f6 * (crypto_int64) g7_19; 405 crypto_int64 f6g8_19 = f6 * (crypto_int64) g8_19; 406 crypto_int64 f6g9_19 = f6 * (crypto_int64) g9_19; 407 crypto_int64 f7g0 = f7 * (crypto_int64) g0; 408 crypto_int64 f7g1_2 = f7_2 * (crypto_int64) g1; 409 crypto_int64 f7g2 = f7 * (crypto_int64) g2; 410 crypto_int64 f7g3_38 = f7_2 * (crypto_int64) g3_19; 411 crypto_int64 f7g4_19 = f7 * (crypto_int64) g4_19; 412 crypto_int64 f7g5_38 = f7_2 * (crypto_int64) g5_19; 413 crypto_int64 f7g6_19 = f7 * (crypto_int64) g6_19; 414 crypto_int64 f7g7_38 = f7_2 * (crypto_int64) g7_19; 415 crypto_int64 f7g8_19 = f7 * (crypto_int64) g8_19; 416 crypto_int64 f7g9_38 = f7_2 * (crypto_int64) g9_19; 417 crypto_int64 f8g0 = f8 * (crypto_int64) g0; 418 crypto_int64 f8g1 = f8 * (crypto_int64) g1; 419 crypto_int64 f8g2_19 = f8 * (crypto_int64) g2_19; 420 crypto_int64 f8g3_19 = f8 * (crypto_int64) g3_19; 421 crypto_int64 f8g4_19 = f8 * (crypto_int64) g4_19; 422 crypto_int64 f8g5_19 = f8 * (crypto_int64) g5_19; 423 crypto_int64 f8g6_19 = f8 * (crypto_int64) g6_19; 424 crypto_int64 f8g7_19 = f8 * (crypto_int64) g7_19; 425 crypto_int64 f8g8_19 = f8 * (crypto_int64) g8_19; 426 crypto_int64 f8g9_19 = f8 * (crypto_int64) g9_19; 427 crypto_int64 f9g0 = f9 * (crypto_int64) g0; 428 crypto_int64 f9g1_38 = f9_2 * (crypto_int64) g1_19; 429 crypto_int64 f9g2_19 = f9 * (crypto_int64) g2_19; 430 crypto_int64 f9g3_38 = f9_2 * (crypto_int64) g3_19; 431 crypto_int64 f9g4_19 = f9 * (crypto_int64) g4_19; 432 crypto_int64 f9g5_38 = f9_2 * (crypto_int64) g5_19; 433 crypto_int64 f9g6_19 = f9 * (crypto_int64) g6_19; 434 crypto_int64 f9g7_38 = f9_2 * (crypto_int64) g7_19; 435 crypto_int64 f9g8_19 = f9 * (crypto_int64) g8_19; 436 crypto_int64 f9g9_38 = f9_2 * (crypto_int64) g9_19; 437 crypto_int64 h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 438 crypto_int64 h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 439 crypto_int64 h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 440 crypto_int64 h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 441 crypto_int64 h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 442 crypto_int64 h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 443 crypto_int64 h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 444 crypto_int64 h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 445 crypto_int64 h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 446 crypto_int64 h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 447 crypto_int64 carry0; 448 crypto_int64 carry1; 449 crypto_int64 carry2; 450 crypto_int64 carry3; 451 crypto_int64 carry4; 452 crypto_int64 carry5; 453 crypto_int64 carry6; 454 crypto_int64 carry7; 455 crypto_int64 carry8; 456 crypto_int64 carry9; 457 458 /* 459 |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) 460 i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 461 |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) 462 i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 463 */ 464 465 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 466 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 467 /* |h0| <= 2^25 */ 468 /* |h4| <= 2^25 */ 469 /* |h1| <= 1.51*2^58 */ 470 /* |h5| <= 1.51*2^58 */ 471 472 carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 473 carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 474 /* |h1| <= 2^24; from now on fits into int32 */ 475 /* |h5| <= 2^24; from now on fits into int32 */ 476 /* |h2| <= 1.21*2^59 */ 477 /* |h6| <= 1.21*2^59 */ 478 479 carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 480 carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 481 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 482 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 483 /* |h3| <= 1.51*2^58 */ 484 /* |h7| <= 1.51*2^58 */ 485 486 carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 487 carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 488 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 489 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 490 /* |h4| <= 1.52*2^33 */ 491 /* |h8| <= 1.52*2^33 */ 492 493 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 494 carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 495 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 496 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 497 /* |h5| <= 1.01*2^24 */ 498 /* |h9| <= 1.51*2^58 */ 499 500 carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 501 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 502 /* |h0| <= 1.8*2^37 */ 503 504 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 505 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 506 /* |h1| <= 1.01*2^24 */ 507 508 h[0] = h0; 509 h[1] = h1; 510 h[2] = h2; 511 h[3] = h3; 512 h[4] = h4; 513 h[5] = h5; 514 h[6] = h6; 515 h[7] = h7; 516 h[8] = h8; 517 h[9] = h9; 518 } 519 520 /* 521 h = f * 121666 522 Can overlap h with f. 523 524 Preconditions: 525 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 526 527 Postconditions: 528 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 529 */ 530 531 void fe_mul121666(fe h,fe f) 532 { 533 crypto_int32 f0 = f[0]; 534 crypto_int32 f1 = f[1]; 535 crypto_int32 f2 = f[2]; 536 crypto_int32 f3 = f[3]; 537 crypto_int32 f4 = f[4]; 538 crypto_int32 f5 = f[5]; 539 crypto_int32 f6 = f[6]; 540 crypto_int32 f7 = f[7]; 541 crypto_int32 f8 = f[8]; 542 crypto_int32 f9 = f[9]; 543 crypto_int64 h0 = f0 * (crypto_int64) 121666; 544 crypto_int64 h1 = f1 * (crypto_int64) 121666; 545 crypto_int64 h2 = f2 * (crypto_int64) 121666; 546 crypto_int64 h3 = f3 * (crypto_int64) 121666; 547 crypto_int64 h4 = f4 * (crypto_int64) 121666; 548 crypto_int64 h5 = f5 * (crypto_int64) 121666; 549 crypto_int64 h6 = f6 * (crypto_int64) 121666; 550 crypto_int64 h7 = f7 * (crypto_int64) 121666; 551 crypto_int64 h8 = f8 * (crypto_int64) 121666; 552 crypto_int64 h9 = f9 * (crypto_int64) 121666; 553 crypto_int64 carry0; 554 crypto_int64 carry1; 555 crypto_int64 carry2; 556 crypto_int64 carry3; 557 crypto_int64 carry4; 558 crypto_int64 carry5; 559 crypto_int64 carry6; 560 crypto_int64 carry7; 561 crypto_int64 carry8; 562 crypto_int64 carry9; 563 564 carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 565 carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 566 carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 567 carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 568 carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 569 570 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 571 carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 572 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 573 carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 574 carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 575 576 h[0] = h0; 577 h[1] = h1; 578 h[2] = h2; 579 h[3] = h3; 580 h[4] = h4; 581 h[5] = h5; 582 h[6] = h6; 583 h[7] = h7; 584 h[8] = h8; 585 h[9] = h9; 586 } 587 588 /* 589 h = f * f 590 Can overlap h with f. 591 592 Preconditions: 593 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 594 595 Postconditions: 596 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 597 */ 598 599 /* 600 See fe_mul.c for discussion of implementation strategy. 601 */ 602 603 void fe_sq(fe h,fe f) 604 { 605 crypto_int32 f0 = f[0]; 606 crypto_int32 f1 = f[1]; 607 crypto_int32 f2 = f[2]; 608 crypto_int32 f3 = f[3]; 609 crypto_int32 f4 = f[4]; 610 crypto_int32 f5 = f[5]; 611 crypto_int32 f6 = f[6]; 612 crypto_int32 f7 = f[7]; 613 crypto_int32 f8 = f[8]; 614 crypto_int32 f9 = f[9]; 615 crypto_int32 f0_2 = 2 * f0; 616 crypto_int32 f1_2 = 2 * f1; 617 crypto_int32 f2_2 = 2 * f2; 618 crypto_int32 f3_2 = 2 * f3; 619 crypto_int32 f4_2 = 2 * f4; 620 crypto_int32 f5_2 = 2 * f5; 621 crypto_int32 f6_2 = 2 * f6; 622 crypto_int32 f7_2 = 2 * f7; 623 crypto_int32 f5_38 = 38 * f5; /* 1.31*2^30 */ 624 crypto_int32 f6_19 = 19 * f6; /* 1.31*2^30 */ 625 crypto_int32 f7_38 = 38 * f7; /* 1.31*2^30 */ 626 crypto_int32 f8_19 = 19 * f8; /* 1.31*2^30 */ 627 crypto_int32 f9_38 = 38 * f9; /* 1.31*2^30 */ 628 crypto_int64 f0f0 = f0 * (crypto_int64) f0; 629 crypto_int64 f0f1_2 = f0_2 * (crypto_int64) f1; 630 crypto_int64 f0f2_2 = f0_2 * (crypto_int64) f2; 631 crypto_int64 f0f3_2 = f0_2 * (crypto_int64) f3; 632 crypto_int64 f0f4_2 = f0_2 * (crypto_int64) f4; 633 crypto_int64 f0f5_2 = f0_2 * (crypto_int64) f5; 634 crypto_int64 f0f6_2 = f0_2 * (crypto_int64) f6; 635 crypto_int64 f0f7_2 = f0_2 * (crypto_int64) f7; 636 crypto_int64 f0f8_2 = f0_2 * (crypto_int64) f8; 637 crypto_int64 f0f9_2 = f0_2 * (crypto_int64) f9; 638 crypto_int64 f1f1_2 = f1_2 * (crypto_int64) f1; 639 crypto_int64 f1f2_2 = f1_2 * (crypto_int64) f2; 640 crypto_int64 f1f3_4 = f1_2 * (crypto_int64) f3_2; 641 crypto_int64 f1f4_2 = f1_2 * (crypto_int64) f4; 642 crypto_int64 f1f5_4 = f1_2 * (crypto_int64) f5_2; 643 crypto_int64 f1f6_2 = f1_2 * (crypto_int64) f6; 644 crypto_int64 f1f7_4 = f1_2 * (crypto_int64) f7_2; 645 crypto_int64 f1f8_2 = f1_2 * (crypto_int64) f8; 646 crypto_int64 f1f9_76 = f1_2 * (crypto_int64) f9_38; 647 crypto_int64 f2f2 = f2 * (crypto_int64) f2; 648 crypto_int64 f2f3_2 = f2_2 * (crypto_int64) f3; 649 crypto_int64 f2f4_2 = f2_2 * (crypto_int64) f4; 650 crypto_int64 f2f5_2 = f2_2 * (crypto_int64) f5; 651 crypto_int64 f2f6_2 = f2_2 * (crypto_int64) f6; 652 crypto_int64 f2f7_2 = f2_2 * (crypto_int64) f7; 653 crypto_int64 f2f8_38 = f2_2 * (crypto_int64) f8_19; 654 crypto_int64 f2f9_38 = f2 * (crypto_int64) f9_38; 655 crypto_int64 f3f3_2 = f3_2 * (crypto_int64) f3; 656 crypto_int64 f3f4_2 = f3_2 * (crypto_int64) f4; 657 crypto_int64 f3f5_4 = f3_2 * (crypto_int64) f5_2; 658 crypto_int64 f3f6_2 = f3_2 * (crypto_int64) f6; 659 crypto_int64 f3f7_76 = f3_2 * (crypto_int64) f7_38; 660 crypto_int64 f3f8_38 = f3_2 * (crypto_int64) f8_19; 661 crypto_int64 f3f9_76 = f3_2 * (crypto_int64) f9_38; 662 crypto_int64 f4f4 = f4 * (crypto_int64) f4; 663 crypto_int64 f4f5_2 = f4_2 * (crypto_int64) f5; 664 crypto_int64 f4f6_38 = f4_2 * (crypto_int64) f6_19; 665 crypto_int64 f4f7_38 = f4 * (crypto_int64) f7_38; 666 crypto_int64 f4f8_38 = f4_2 * (crypto_int64) f8_19; 667 crypto_int64 f4f9_38 = f4 * (crypto_int64) f9_38; 668 crypto_int64 f5f5_38 = f5 * (crypto_int64) f5_38; 669 crypto_int64 f5f6_38 = f5_2 * (crypto_int64) f6_19; 670 crypto_int64 f5f7_76 = f5_2 * (crypto_int64) f7_38; 671 crypto_int64 f5f8_38 = f5_2 * (crypto_int64) f8_19; 672 crypto_int64 f5f9_76 = f5_2 * (crypto_int64) f9_38; 673 crypto_int64 f6f6_19 = f6 * (crypto_int64) f6_19; 674 crypto_int64 f6f7_38 = f6 * (crypto_int64) f7_38; 675 crypto_int64 f6f8_38 = f6_2 * (crypto_int64) f8_19; 676 crypto_int64 f6f9_38 = f6 * (crypto_int64) f9_38; 677 crypto_int64 f7f7_38 = f7 * (crypto_int64) f7_38; 678 crypto_int64 f7f8_38 = f7_2 * (crypto_int64) f8_19; 679 crypto_int64 f7f9_76 = f7_2 * (crypto_int64) f9_38; 680 crypto_int64 f8f8_19 = f8 * (crypto_int64) f8_19; 681 crypto_int64 f8f9_38 = f8 * (crypto_int64) f9_38; 682 crypto_int64 f9f9_38 = f9 * (crypto_int64) f9_38; 683 crypto_int64 h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 684 crypto_int64 h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 685 crypto_int64 h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 686 crypto_int64 h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 687 crypto_int64 h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 688 crypto_int64 h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 689 crypto_int64 h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 690 crypto_int64 h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 691 crypto_int64 h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 692 crypto_int64 h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 693 crypto_int64 carry0; 694 crypto_int64 carry1; 695 crypto_int64 carry2; 696 crypto_int64 carry3; 697 crypto_int64 carry4; 698 crypto_int64 carry5; 699 crypto_int64 carry6; 700 crypto_int64 carry7; 701 crypto_int64 carry8; 702 crypto_int64 carry9; 703 704 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 705 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 706 707 carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 708 carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 709 710 carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 711 carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 712 713 carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 714 carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 715 716 carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 717 carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 718 719 carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 720 721 carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 722 723 h[0] = h0; 724 h[1] = h1; 725 h[2] = h2; 726 h[3] = h3; 727 h[4] = h4; 728 h[5] = h5; 729 h[6] = h6; 730 h[7] = h7; 731 h[8] = h8; 732 h[9] = h9; 733 } 734 735 /* 736 h = f - g 737 Can overlap h with f or g. 738 739 Preconditions: 740 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 741 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 742 743 Postconditions: 744 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 745 */ 746 747 void fe_sub(fe h,fe f,fe g) 748 { 749 crypto_int32 f0 = f[0]; 750 crypto_int32 f1 = f[1]; 751 crypto_int32 f2 = f[2]; 752 crypto_int32 f3 = f[3]; 753 crypto_int32 f4 = f[4]; 754 crypto_int32 f5 = f[5]; 755 crypto_int32 f6 = f[6]; 756 crypto_int32 f7 = f[7]; 757 crypto_int32 f8 = f[8]; 758 crypto_int32 f9 = f[9]; 759 crypto_int32 g0 = g[0]; 760 crypto_int32 g1 = g[1]; 761 crypto_int32 g2 = g[2]; 762 crypto_int32 g3 = g[3]; 763 crypto_int32 g4 = g[4]; 764 crypto_int32 g5 = g[5]; 765 crypto_int32 g6 = g[6]; 766 crypto_int32 g7 = g[7]; 767 crypto_int32 g8 = g[8]; 768 crypto_int32 g9 = g[9]; 769 crypto_int32 h0 = f0 - g0; 770 crypto_int32 h1 = f1 - g1; 771 crypto_int32 h2 = f2 - g2; 772 crypto_int32 h3 = f3 - g3; 773 crypto_int32 h4 = f4 - g4; 774 crypto_int32 h5 = f5 - g5; 775 crypto_int32 h6 = f6 - g6; 776 crypto_int32 h7 = f7 - g7; 777 crypto_int32 h8 = f8 - g8; 778 crypto_int32 h9 = f9 - g9; 779 h[0] = h0; 780 h[1] = h1; 781 h[2] = h2; 782 h[3] = h3; 783 h[4] = h4; 784 h[5] = h5; 785 h[6] = h6; 786 h[7] = h7; 787 h[8] = h8; 788 h[9] = h9; 789 } 790 791 /* 792 Preconditions: 793 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 794 795 Write p=2^255-19; q=floor(h/p). 796 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 797 798 Proof: 799 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 800 Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. 801 802 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 803 Then 0<y<1. 804 805 Write r=h-pq. 806 Have 0<=r<=p-1=2^255-20. 807 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 808 809 Write x=r+19(2^-255)r+y. 810 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 811 812 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 813 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 814 */ 815 816 void fe_tobytes(unsigned char *s,fe h) 817 { 818 crypto_int32 h0 = h[0]; 819 crypto_int32 h1 = h[1]; 820 crypto_int32 h2 = h[2]; 821 crypto_int32 h3 = h[3]; 822 crypto_int32 h4 = h[4]; 823 crypto_int32 h5 = h[5]; 824 crypto_int32 h6 = h[6]; 825 crypto_int32 h7 = h[7]; 826 crypto_int32 h8 = h[8]; 827 crypto_int32 h9 = h[9]; 828 crypto_int32 q; 829 crypto_int32 carry0; 830 crypto_int32 carry1; 831 crypto_int32 carry2; 832 crypto_int32 carry3; 833 crypto_int32 carry4; 834 crypto_int32 carry5; 835 crypto_int32 carry6; 836 crypto_int32 carry7; 837 crypto_int32 carry8; 838 crypto_int32 carry9; 839 840 q = (19 * h9 + (((crypto_int32) 1) << 24)) >> 25; 841 q = (h0 + q) >> 26; 842 q = (h1 + q) >> 25; 843 q = (h2 + q) >> 26; 844 q = (h3 + q) >> 25; 845 q = (h4 + q) >> 26; 846 q = (h5 + q) >> 25; 847 q = (h6 + q) >> 26; 848 q = (h7 + q) >> 25; 849 q = (h8 + q) >> 26; 850 q = (h9 + q) >> 25; 851 852 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 853 h0 += 19 * q; 854 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 855 856 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 857 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 858 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 859 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 860 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 861 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 862 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 863 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 864 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 865 carry9 = h9 >> 25; h9 -= carry9 << 25; 866 /* h10 = carry9 */ 867 868 /* 869 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 870 Have h0+...+2^230 h9 between 0 and 2^255-1; 871 evidently 2^255 h10-2^255 q = 0. 872 Goal: Output h0+...+2^230 h9. 873 */ 874 875 s[0] = h0 >> 0; 876 s[1] = h0 >> 8; 877 s[2] = h0 >> 16; 878 s[3] = (h0 >> 24) | (h1 << 2); 879 s[4] = h1 >> 6; 880 s[5] = h1 >> 14; 881 s[6] = (h1 >> 22) | (h2 << 3); 882 s[7] = h2 >> 5; 883 s[8] = h2 >> 13; 884 s[9] = (h2 >> 21) | (h3 << 5); 885 s[10] = h3 >> 3; 886 s[11] = h3 >> 11; 887 s[12] = (h3 >> 19) | (h4 << 6); 888 s[13] = h4 >> 2; 889 s[14] = h4 >> 10; 890 s[15] = h4 >> 18; 891 s[16] = h5 >> 0; 892 s[17] = h5 >> 8; 893 s[18] = h5 >> 16; 894 s[19] = (h5 >> 24) | (h6 << 1); 895 s[20] = h6 >> 7; 896 s[21] = h6 >> 15; 897 s[22] = (h6 >> 23) | (h7 << 3); 898 s[23] = h7 >> 5; 899 s[24] = h7 >> 13; 900 s[25] = (h7 >> 21) | (h8 << 4); 901 s[26] = h8 >> 4; 902 s[27] = h8 >> 12; 903 s[28] = (h8 >> 20) | (h9 << 6); 904 s[29] = h9 >> 2; 905 s[30] = h9 >> 10; 906 s[31] = h9 >> 18; 907 } 908 909 void fe_invert(fe out,fe z) 910 { 911 fe t0; 912 fe t1; 913 fe t2; 914 fe t3; 915 int i; 916 917 918 /* qhasm: fe z1 */ 919 920 /* qhasm: fe z2 */ 921 922 /* qhasm: fe z8 */ 923 924 /* qhasm: fe z9 */ 925 926 /* qhasm: fe z11 */ 927 928 /* qhasm: fe z22 */ 929 930 /* qhasm: fe z_5_0 */ 931 932 /* qhasm: fe z_10_5 */ 933 934 /* qhasm: fe z_10_0 */ 935 936 /* qhasm: fe z_20_10 */ 937 938 /* qhasm: fe z_20_0 */ 939 940 /* qhasm: fe z_40_20 */ 941 942 /* qhasm: fe z_40_0 */ 943 944 /* qhasm: fe z_50_10 */ 945 946 /* qhasm: fe z_50_0 */ 947 948 /* qhasm: fe z_100_50 */ 949 950 /* qhasm: fe z_100_0 */ 951 952 /* qhasm: fe z_200_100 */ 953 954 /* qhasm: fe z_200_0 */ 955 956 /* qhasm: fe z_250_50 */ 957 958 /* qhasm: fe z_250_0 */ 959 960 /* qhasm: fe z_255_5 */ 961 962 /* qhasm: fe z_255_21 */ 963 964 /* qhasm: enter pow225521 */ 965 966 /* qhasm: z2 = z1^2^1 */ 967 /* asm 1: fe_sq(>z2=fe#1,<z1=fe#11); for (i = 1;i < 1;++i) fe_sq(>z2=fe#1,>z2=fe#1); */ 968 /* asm 2: fe_sq(>z2=t0,<z1=z); for (i = 1;i < 1;++i) fe_sq(>z2=t0,>z2=t0); */ 969 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 970 971 /* qhasm: z8 = z2^2^2 */ 972 /* asm 1: fe_sq(>z8=fe#2,<z2=fe#1); for (i = 1;i < 2;++i) fe_sq(>z8=fe#2,>z8=fe#2); */ 973 /* asm 2: fe_sq(>z8=t1,<z2=t0); for (i = 1;i < 2;++i) fe_sq(>z8=t1,>z8=t1); */ 974 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 975 976 /* qhasm: z9 = z1*z8 */ 977 /* asm 1: fe_mul(>z9=fe#2,<z1=fe#11,<z8=fe#2); */ 978 /* asm 2: fe_mul(>z9=t1,<z1=z,<z8=t1); */ 979 fe_mul(t1,z,t1); 980 981 /* qhasm: z11 = z2*z9 */ 982 /* asm 1: fe_mul(>z11=fe#1,<z2=fe#1,<z9=fe#2); */ 983 /* asm 2: fe_mul(>z11=t0,<z2=t0,<z9=t1); */ 984 fe_mul(t0,t0,t1); 985 986 /* qhasm: z22 = z11^2^1 */ 987 /* asm 1: fe_sq(>z22=fe#3,<z11=fe#1); for (i = 1;i < 1;++i) fe_sq(>z22=fe#3,>z22=fe#3); */ 988 /* asm 2: fe_sq(>z22=t2,<z11=t0); for (i = 1;i < 1;++i) fe_sq(>z22=t2,>z22=t2); */ 989 fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); 990 991 /* qhasm: z_5_0 = z9*z22 */ 992 /* asm 1: fe_mul(>z_5_0=fe#2,<z9=fe#2,<z22=fe#3); */ 993 /* asm 2: fe_mul(>z_5_0=t1,<z9=t1,<z22=t2); */ 994 fe_mul(t1,t1,t2); 995 996 /* qhasm: z_10_5 = z_5_0^2^5 */ 997 /* asm 1: fe_sq(>z_10_5=fe#3,<z_5_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_10_5=fe#3,>z_10_5=fe#3); */ 998 /* asm 2: fe_sq(>z_10_5=t2,<z_5_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_10_5=t2,>z_10_5=t2); */ 999 fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); 1000 1001 /* qhasm: z_10_0 = z_10_5*z_5_0 */ 1002 /* asm 1: fe_mul(>z_10_0=fe#2,<z_10_5=fe#3,<z_5_0=fe#2); */ 1003 /* asm 2: fe_mul(>z_10_0=t1,<z_10_5=t2,<z_5_0=t1); */ 1004 fe_mul(t1,t2,t1); 1005 1006 /* qhasm: z_20_10 = z_10_0^2^10 */ 1007 /* asm 1: fe_sq(>z_20_10=fe#3,<z_10_0=fe#2); for (i = 1;i < 10;++i) fe_sq(>z_20_10=fe#3,>z_20_10=fe#3); */ 1008 /* asm 2: fe_sq(>z_20_10=t2,<z_10_0=t1); for (i = 1;i < 10;++i) fe_sq(>z_20_10=t2,>z_20_10=t2); */ 1009 fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); 1010 1011 /* qhasm: z_20_0 = z_20_10*z_10_0 */ 1012 /* asm 1: fe_mul(>z_20_0=fe#3,<z_20_10=fe#3,<z_10_0=fe#2); */ 1013 /* asm 2: fe_mul(>z_20_0=t2,<z_20_10=t2,<z_10_0=t1); */ 1014 fe_mul(t2,t2,t1); 1015 1016 /* qhasm: z_40_20 = z_20_0^2^20 */ 1017 /* asm 1: fe_sq(>z_40_20=fe#4,<z_20_0=fe#3); for (i = 1;i < 20;++i) fe_sq(>z_40_20=fe#4,>z_40_20=fe#4); */ 1018 /* asm 2: fe_sq(>z_40_20=t3,<z_20_0=t2); for (i = 1;i < 20;++i) fe_sq(>z_40_20=t3,>z_40_20=t3); */ 1019 fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); 1020 1021 /* qhasm: z_40_0 = z_40_20*z_20_0 */ 1022 /* asm 1: fe_mul(>z_40_0=fe#3,<z_40_20=fe#4,<z_20_0=fe#3); */ 1023 /* asm 2: fe_mul(>z_40_0=t2,<z_40_20=t3,<z_20_0=t2); */ 1024 fe_mul(t2,t3,t2); 1025 1026 /* qhasm: z_50_10 = z_40_0^2^10 */ 1027 /* asm 1: fe_sq(>z_50_10=fe#3,<z_40_0=fe#3); for (i = 1;i < 10;++i) fe_sq(>z_50_10=fe#3,>z_50_10=fe#3); */ 1028 /* asm 2: fe_sq(>z_50_10=t2,<z_40_0=t2); for (i = 1;i < 10;++i) fe_sq(>z_50_10=t2,>z_50_10=t2); */ 1029 fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); 1030 1031 /* qhasm: z_50_0 = z_50_10*z_10_0 */ 1032 /* asm 1: fe_mul(>z_50_0=fe#2,<z_50_10=fe#3,<z_10_0=fe#2); */ 1033 /* asm 2: fe_mul(>z_50_0=t1,<z_50_10=t2,<z_10_0=t1); */ 1034 fe_mul(t1,t2,t1); 1035 1036 /* qhasm: z_100_50 = z_50_0^2^50 */ 1037 /* asm 1: fe_sq(>z_100_50=fe#3,<z_50_0=fe#2); for (i = 1;i < 50;++i) fe_sq(>z_100_50=fe#3,>z_100_50=fe#3); */ 1038 /* asm 2: fe_sq(>z_100_50=t2,<z_50_0=t1); for (i = 1;i < 50;++i) fe_sq(>z_100_50=t2,>z_100_50=t2); */ 1039 fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); 1040 1041 /* qhasm: z_100_0 = z_100_50*z_50_0 */ 1042 /* asm 1: fe_mul(>z_100_0=fe#3,<z_100_50=fe#3,<z_50_0=fe#2); */ 1043 /* asm 2: fe_mul(>z_100_0=t2,<z_100_50=t2,<z_50_0=t1); */ 1044 fe_mul(t2,t2,t1); 1045 1046 /* qhasm: z_200_100 = z_100_0^2^100 */ 1047 /* asm 1: fe_sq(>z_200_100=fe#4,<z_100_0=fe#3); for (i = 1;i < 100;++i) fe_sq(>z_200_100=fe#4,>z_200_100=fe#4); */ 1048 /* asm 2: fe_sq(>z_200_100=t3,<z_100_0=t2); for (i = 1;i < 100;++i) fe_sq(>z_200_100=t3,>z_200_100=t3); */ 1049 fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); 1050 1051 /* qhasm: z_200_0 = z_200_100*z_100_0 */ 1052 /* asm 1: fe_mul(>z_200_0=fe#3,<z_200_100=fe#4,<z_100_0=fe#3); */ 1053 /* asm 2: fe_mul(>z_200_0=t2,<z_200_100=t3,<z_100_0=t2); */ 1054 fe_mul(t2,t3,t2); 1055 1056 /* qhasm: z_250_50 = z_200_0^2^50 */ 1057 /* asm 1: fe_sq(>z_250_50=fe#3,<z_200_0=fe#3); for (i = 1;i < 50;++i) fe_sq(>z_250_50=fe#3,>z_250_50=fe#3); */ 1058 /* asm 2: fe_sq(>z_250_50=t2,<z_200_0=t2); for (i = 1;i < 50;++i) fe_sq(>z_250_50=t2,>z_250_50=t2); */ 1059 fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); 1060 1061 /* qhasm: z_250_0 = z_250_50*z_50_0 */ 1062 /* asm 1: fe_mul(>z_250_0=fe#2,<z_250_50=fe#3,<z_50_0=fe#2); */ 1063 /* asm 2: fe_mul(>z_250_0=t1,<z_250_50=t2,<z_50_0=t1); */ 1064 fe_mul(t1,t2,t1); 1065 1066 /* qhasm: z_255_5 = z_250_0^2^5 */ 1067 /* asm 1: fe_sq(>z_255_5=fe#2,<z_250_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_255_5=fe#2,>z_255_5=fe#2); */ 1068 /* asm 2: fe_sq(>z_255_5=t1,<z_250_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_255_5=t1,>z_255_5=t1); */ 1069 fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); 1070 1071 /* qhasm: z_255_21 = z_255_5*z11 */ 1072 /* asm 1: fe_mul(>z_255_21=fe#12,<z_255_5=fe#2,<z11=fe#1); */ 1073 /* asm 2: fe_mul(>z_255_21=out,<z_255_5=t1,<z11=t0); */ 1074 fe_mul(out,t1,t0); 1075 1076 /* qhasm: return */ 1077 1078 return; 1079 } 1080 1081 1082 int crypto_scalarmult_ref10(unsigned char *q, 1083 const unsigned char *n, 1084 const unsigned char *p) 1085 { 1086 unsigned char e[32]; 1087 unsigned int i; 1088 fe x1; 1089 fe x2; 1090 fe z2; 1091 fe x3; 1092 fe z3; 1093 fe tmp0; 1094 fe tmp1; 1095 int pos; 1096 unsigned int swap; 1097 unsigned int b; 1098 1099 for (i = 0;i < 32;++i) e[i] = n[i]; 1100 e[0] &= 248; 1101 e[31] &= 127; 1102 e[31] |= 64; 1103 fe_frombytes(x1,p); 1104 fe_1(x2); 1105 fe_0(z2); 1106 fe_copy(x3,x1); 1107 fe_1(z3); 1108 1109 swap = 0; 1110 for (pos = 254;pos >= 0;--pos) { 1111 b = e[pos / 8] >> (pos & 7); 1112 b &= 1; 1113 swap ^= b; 1114 fe_cswap(x2,x3,swap); 1115 fe_cswap(z2,z3,swap); 1116 swap = b; 1117 /* qhasm: fe X2 */ 1118 1119 /* qhasm: fe Z2 */ 1120 1121 /* qhasm: fe X3 */ 1122 1123 /* qhasm: fe Z3 */ 1124 1125 /* qhasm: fe X4 */ 1126 1127 /* qhasm: fe Z4 */ 1128 1129 /* qhasm: fe X5 */ 1130 1131 /* qhasm: fe Z5 */ 1132 1133 /* qhasm: fe A */ 1134 1135 /* qhasm: fe B */ 1136 1137 /* qhasm: fe C */ 1138 1139 /* qhasm: fe D */ 1140 1141 /* qhasm: fe E */ 1142 1143 /* qhasm: fe AA */ 1144 1145 /* qhasm: fe BB */ 1146 1147 /* qhasm: fe DA */ 1148 1149 /* qhasm: fe CB */ 1150 1151 /* qhasm: fe t0 */ 1152 1153 /* qhasm: fe t1 */ 1154 1155 /* qhasm: fe t2 */ 1156 1157 /* qhasm: fe t3 */ 1158 1159 /* qhasm: fe t4 */ 1160 1161 /* qhasm: enter ladder */ 1162 1163 /* qhasm: D = X3-Z3 */ 1164 /* asm 1: fe_sub(>D=fe#5,<X3=fe#3,<Z3=fe#4); */ 1165 /* asm 2: fe_sub(>D=tmp0,<X3=x3,<Z3=z3); */ 1166 fe_sub(tmp0,x3,z3); 1167 1168 /* qhasm: B = X2-Z2 */ 1169 /* asm 1: fe_sub(>B=fe#6,<X2=fe#1,<Z2=fe#2); */ 1170 /* asm 2: fe_sub(>B=tmp1,<X2=x2,<Z2=z2); */ 1171 fe_sub(tmp1,x2,z2); 1172 1173 /* qhasm: A = X2+Z2 */ 1174 /* asm 1: fe_add(>A=fe#1,<X2=fe#1,<Z2=fe#2); */ 1175 /* asm 2: fe_add(>A=x2,<X2=x2,<Z2=z2); */ 1176 fe_add(x2,x2,z2); 1177 1178 /* qhasm: C = X3+Z3 */ 1179 /* asm 1: fe_add(>C=fe#2,<X3=fe#3,<Z3=fe#4); */ 1180 /* asm 2: fe_add(>C=z2,<X3=x3,<Z3=z3); */ 1181 fe_add(z2,x3,z3); 1182 1183 /* qhasm: DA = D*A */ 1184 /* asm 1: fe_mul(>DA=fe#4,<D=fe#5,<A=fe#1); */ 1185 /* asm 2: fe_mul(>DA=z3,<D=tmp0,<A=x2); */ 1186 fe_mul(z3,tmp0,x2); 1187 1188 /* qhasm: CB = C*B */ 1189 /* asm 1: fe_mul(>CB=fe#2,<C=fe#2,<B=fe#6); */ 1190 /* asm 2: fe_mul(>CB=z2,<C=z2,<B=tmp1); */ 1191 fe_mul(z2,z2,tmp1); 1192 1193 /* qhasm: BB = B^2 */ 1194 /* asm 1: fe_sq(>BB=fe#5,<B=fe#6); */ 1195 /* asm 2: fe_sq(>BB=tmp0,<B=tmp1); */ 1196 fe_sq(tmp0,tmp1); 1197 1198 /* qhasm: AA = A^2 */ 1199 /* asm 1: fe_sq(>AA=fe#6,<A=fe#1); */ 1200 /* asm 2: fe_sq(>AA=tmp1,<A=x2); */ 1201 fe_sq(tmp1,x2); 1202 1203 /* qhasm: t0 = DA+CB */ 1204 /* asm 1: fe_add(>t0=fe#3,<DA=fe#4,<CB=fe#2); */ 1205 /* asm 2: fe_add(>t0=x3,<DA=z3,<CB=z2); */ 1206 fe_add(x3,z3,z2); 1207 1208 /* qhasm: assign x3 to t0 */ 1209 1210 /* qhasm: t1 = DA-CB */ 1211 /* asm 1: fe_sub(>t1=fe#2,<DA=fe#4,<CB=fe#2); */ 1212 /* asm 2: fe_sub(>t1=z2,<DA=z3,<CB=z2); */ 1213 fe_sub(z2,z3,z2); 1214 1215 /* qhasm: X4 = AA*BB */ 1216 /* asm 1: fe_mul(>X4=fe#1,<AA=fe#6,<BB=fe#5); */ 1217 /* asm 2: fe_mul(>X4=x2,<AA=tmp1,<BB=tmp0); */ 1218 fe_mul(x2,tmp1,tmp0); 1219 1220 /* qhasm: E = AA-BB */ 1221 /* asm 1: fe_sub(>E=fe#6,<AA=fe#6,<BB=fe#5); */ 1222 /* asm 2: fe_sub(>E=tmp1,<AA=tmp1,<BB=tmp0); */ 1223 fe_sub(tmp1,tmp1,tmp0); 1224 1225 /* qhasm: t2 = t1^2 */ 1226 /* asm 1: fe_sq(>t2=fe#2,<t1=fe#2); */ 1227 /* asm 2: fe_sq(>t2=z2,<t1=z2); */ 1228 fe_sq(z2,z2); 1229 1230 /* qhasm: t3 = a24*E */ 1231 /* asm 1: fe_mul121666(>t3=fe#4,<E=fe#6); */ 1232 /* asm 2: fe_mul121666(>t3=z3,<E=tmp1); */ 1233 fe_mul121666(z3,tmp1); 1234 1235 /* qhasm: X5 = t0^2 */ 1236 /* asm 1: fe_sq(>X5=fe#3,<t0=fe#3); */ 1237 /* asm 2: fe_sq(>X5=x3,<t0=x3); */ 1238 fe_sq(x3,x3); 1239 1240 /* qhasm: t4 = BB+t3 */ 1241 /* asm 1: fe_add(>t4=fe#5,<BB=fe#5,<t3=fe#4); */ 1242 /* asm 2: fe_add(>t4=tmp0,<BB=tmp0,<t3=z3); */ 1243 fe_add(tmp0,tmp0,z3); 1244 1245 /* qhasm: Z5 = X1*t2 */ 1246 /* asm 1: fe_mul(>Z5=fe#4,x1,<t2=fe#2); */ 1247 /* asm 2: fe_mul(>Z5=z3,x1,<t2=z2); */ 1248 fe_mul(z3,x1,z2); 1249 1250 /* qhasm: Z4 = E*t4 */ 1251 /* asm 1: fe_mul(>Z4=fe#2,<E=fe#6,<t4=fe#5); */ 1252 /* asm 2: fe_mul(>Z4=z2,<E=tmp1,<t4=tmp0); */ 1253 fe_mul(z2,tmp1,tmp0); 1254 1255 /* qhasm: return */ 1256 } 1257 fe_cswap(x2,x3,swap); 1258 fe_cswap(z2,z3,swap); 1259 1260 fe_invert(z2,z2); 1261 fe_mul(x2,x2,z2); 1262 fe_tobytes(q,x2); 1263 return 0; 1264 } 1265 1266 static const unsigned char basepoint[32] = {9}; 1267 1268 int crypto_scalarmult_base_ref10(unsigned char *q,const unsigned char *n) 1269 { 1270 return crypto_scalarmult_ref10(q,n,basepoint); 1271 }