keccak-tiny-unrolled.c (9945B)
1 /** libkeccak-tiny 2 * 3 * A single-file implementation of SHA-3 and SHAKE. 4 * 5 * Implementor: David Leon Gil 6 * License: CC0, attribution kindly requested. Blame taken too, 7 * but not liability. 8 */ 9 #include "keccak-tiny.h" 10 11 #include <string.h> 12 #include "lib/crypt_ops/crypto_util.h" 13 #include "byteorder.h" 14 15 /******** Endianness conversion helpers ********/ 16 17 static inline uint64_t 18 loadu64le(const unsigned char *x) { 19 uint64_t r; 20 memcpy(&r, x, sizeof(r)); 21 return _le64toh(r); 22 } 23 24 static inline void 25 storeu64le(uint8_t *x, uint64_t u) { 26 uint64_t val = _le64toh(u); 27 memcpy(x, &val, sizeof(u)); 28 } 29 30 /******** The Keccak-f[1600] permutation ********/ 31 32 /*** Constants. ***/ 33 static const uint8_t rho[24] = \ 34 { 1, 3, 6, 10, 15, 21, 35 28, 36, 45, 55, 2, 14, 36 27, 41, 56, 8, 25, 43, 37 62, 18, 39, 61, 20, 44}; 38 static const uint8_t pi[24] = \ 39 {10, 7, 11, 17, 18, 3, 40 5, 16, 8, 21, 24, 4, 41 15, 23, 19, 13, 12, 2, 42 20, 14, 22, 9, 6, 1}; 43 static const uint64_t RC[24] = \ 44 {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 45 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 46 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, 47 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 48 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, 49 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; 50 51 /*** Helper macros to unroll the permutation. ***/ 52 #define rol(x, s) (((x) << s) | ((x) >> (64 - s))) 53 #define REPEAT6(e) e e e e e e 54 #define REPEAT24(e) REPEAT6(e e e e) 55 #define REPEAT5(e) e e e e e 56 #define FOR5(v, s, e) \ 57 v = 0; \ 58 REPEAT5(e; v += s;) 59 60 /*** Keccak-f[1600] ***/ 61 static inline void keccakf(void* state) { 62 uint64_t* a = (uint64_t*)state; 63 uint64_t b[5] = {0}; 64 uint64_t t = 0; 65 uint8_t x, y, i = 0; 66 67 REPEAT24( 68 // Theta 69 FOR5(x, 1, 70 b[x] = 0; 71 FOR5(y, 5, 72 b[x] ^= a[x + y]; )) 73 FOR5(x, 1, 74 FOR5(y, 5, 75 a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) 76 // Rho and pi 77 t = a[1]; 78 x = 0; 79 REPEAT24(b[0] = a[pi[x]]; 80 a[pi[x]] = rol(t, rho[x]); 81 t = b[0]; 82 x++; ) 83 // Chi 84 FOR5(y, 85 5, 86 FOR5(x, 1, 87 b[x] = a[y + x];) 88 FOR5(x, 1, 89 a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) 90 // Iota 91 a[0] ^= RC[i]; 92 i++; ) 93 } 94 95 /******** The FIPS202-defined functions. ********/ 96 97 /*** Some helper macros. ***/ 98 99 // `xorin` modified to handle Big Endian systems, `buf` being unaligned on 100 // systems that care about such things. Assumes that len is a multiple of 8, 101 // which is always true for the rates we use, and the modified finalize. 102 static inline void 103 xorin8(uint8_t *dst, const uint8_t *src, size_t len) { 104 uint64_t* a = (uint64_t*)dst; // Always aligned. 105 for (size_t i = 0; i < len; i += 8) { 106 a[i/8] ^= loadu64le(src + i); 107 } 108 } 109 110 // `setout` likewise modified to handle Big Endian systems. Assumes that len 111 // is a multiple of 8, which is true for every rate we use. 112 static inline void 113 setout8(const uint8_t *src, uint8_t *dst, size_t len) { 114 const uint64_t *si = (const uint64_t*)src; // Always aligned. 115 for (size_t i = 0; i < len; i+= 8) { 116 storeu64le(dst+i, si[i/8]); 117 } 118 } 119 120 #define P keccakf 121 #define Plen KECCAK_MAX_RATE 122 123 #define KECCAK_DELIM_DIGEST 0x06 124 #define KECCAK_DELIM_XOF 0x1f 125 126 // Fold P*F over the full blocks of an input. 127 #define foldP(I, L, F) \ 128 while (L >= s->rate) { \ 129 F(s->a, I, s->rate); \ 130 P(s->a); \ 131 I += s->rate; \ 132 L -= s->rate; \ 133 } 134 135 static inline void 136 keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks) 137 { 138 size_t blen = nr_blocks * s->rate; 139 foldP(buf, blen, xorin8); 140 } 141 142 static int 143 keccak_update(keccak_state *s, const uint8_t *buf, size_t len) 144 { 145 if (s->finalized) 146 return -1; 147 if ((buf == NULL) && len != 0) 148 return -1; 149 150 size_t remaining = len; 151 while (remaining > 0) { 152 if (s->offset == 0) { 153 const size_t blocks = remaining / s->rate; 154 size_t direct_bytes = blocks * s->rate; 155 if (direct_bytes > 0) { 156 keccak_absorb_blocks(s, buf, blocks); 157 remaining -= direct_bytes; 158 buf += direct_bytes; 159 } 160 } 161 162 const size_t buf_avail = s->rate - s->offset; 163 const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail; 164 if (buf_bytes > 0) { 165 memcpy(&s->block[s->offset], buf, buf_bytes); 166 s->offset += buf_bytes; 167 remaining -= buf_bytes; 168 buf += buf_bytes; 169 } 170 if (s->offset == s->rate) { 171 keccak_absorb_blocks(s, s->block, 1); 172 s->offset = 0; 173 } 174 } 175 return 0; 176 } 177 178 static void 179 keccak_finalize(keccak_state *s) 180 { 181 // Xor in the DS and pad frame. 182 s->block[s->offset++] = s->delim; // DS. 183 for (size_t i = s->offset; i < s->rate; i++) { 184 s->block[i] = 0; 185 } 186 s->block[s->rate - 1] |= 0x80; // Pad frame. 187 188 // Xor in the last block. 189 xorin8(s->a, s->block, s->rate); 190 191 memwipe(s->block, 0, sizeof(s->block)); 192 s->finalized = 1; 193 s->offset = s->rate; 194 } 195 196 static inline void 197 keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks) 198 { 199 for (size_t n = 0; n < nr_blocks; n++) { 200 keccakf(s->a); 201 setout8(s->a, out, s->rate); 202 out += s->rate; 203 } 204 } 205 206 static int 207 keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen) 208 { 209 if (!s->finalized) 210 return -1; 211 212 size_t remaining = outlen; 213 while (remaining > 0) { 214 if (s->offset == s->rate) { 215 const size_t blocks = remaining / s->rate; 216 const size_t direct_bytes = blocks * s->rate; 217 if (blocks > 0) { 218 keccak_squeeze_blocks(s, out, blocks); 219 out += direct_bytes; 220 remaining -= direct_bytes; 221 } 222 223 if (remaining > 0) { 224 keccak_squeeze_blocks(s, s->block, 1); 225 s->offset = 0; 226 } 227 } 228 229 const size_t buf_bytes = s->rate - s->offset; 230 const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes; 231 if (indirect_bytes > 0) { 232 memcpy(out, &s->block[s->offset], indirect_bytes); 233 out += indirect_bytes; 234 s->offset += indirect_bytes; 235 remaining -= indirect_bytes; 236 } 237 } 238 return 0; 239 } 240 241 int 242 keccak_digest_init(keccak_state *s, size_t bits) 243 { 244 if (s == NULL) 245 return -1; 246 if (bits != 224 && bits != 256 && bits != 384 && bits != 512) 247 return -1; 248 249 keccak_cleanse(s); 250 s->rate = KECCAK_RATE(bits); 251 s->delim = KECCAK_DELIM_DIGEST; 252 return 0; 253 } 254 255 int 256 keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len) 257 { 258 if (s == NULL) 259 return -1; 260 if (s->delim != KECCAK_DELIM_DIGEST) 261 return -1; 262 263 return keccak_update(s, buf, len); 264 } 265 266 int 267 keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen) 268 { 269 if (s == NULL) 270 return -1; 271 if (s->delim != KECCAK_DELIM_DIGEST) 272 return -1; 273 if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8) 274 return -1; 275 276 // Work in a copy so that incremental/rolling hashes are easy. 277 keccak_state s_tmp; 278 keccak_clone(&s_tmp, s); 279 keccak_finalize(&s_tmp); 280 int ret = keccak_squeeze(&s_tmp, out, outlen); 281 keccak_cleanse(&s_tmp); 282 return ret; 283 } 284 285 int 286 keccak_xof_init(keccak_state *s, size_t bits) 287 { 288 if (s == NULL) 289 return -1; 290 if (bits != 128 && bits != 256) 291 return -1; 292 293 keccak_cleanse(s); 294 s->rate = KECCAK_RATE(bits); 295 s->delim = KECCAK_DELIM_XOF; 296 return 0; 297 } 298 299 int 300 keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len) 301 { 302 if (s == NULL) 303 return -1; 304 if (s->delim != KECCAK_DELIM_XOF) 305 return -1; 306 307 return keccak_update(s, buf, len); 308 } 309 310 int 311 keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen) 312 { 313 if (s == NULL) 314 return -1; 315 if (s->delim != KECCAK_DELIM_XOF) 316 return -1; 317 318 if (!s->finalized) 319 keccak_finalize(s); 320 321 return keccak_squeeze(s, out, outlen); 322 } 323 324 void 325 keccak_clone(keccak_state *out, const keccak_state *in) 326 { 327 memcpy(out, in, sizeof(keccak_state)); 328 } 329 330 void 331 keccak_cleanse(keccak_state *s) 332 { 333 memwipe(s, 0, sizeof(keccak_state)); 334 } 335 336 /** The sponge-based hash construction. **/ 337 static inline int hash(uint8_t* out, size_t outlen, 338 const uint8_t* in, size_t inlen, 339 size_t bits, uint8_t delim) { 340 if ((out == NULL) || ((in == NULL) && inlen != 0)) { 341 return -1; 342 } 343 344 int ret = 0; 345 keccak_state s; 346 keccak_cleanse(&s); 347 348 switch (delim) { 349 case KECCAK_DELIM_DIGEST: 350 ret |= keccak_digest_init(&s, bits); 351 ret |= keccak_digest_update(&s, in, inlen); 352 // Use the internal API instead of sum to avoid the memcpy. 353 keccak_finalize(&s); 354 ret |= keccak_squeeze(&s, out, outlen); 355 break; 356 case KECCAK_DELIM_XOF: 357 ret |= keccak_xof_init(&s, bits); 358 ret |= keccak_xof_absorb(&s, in, inlen); 359 ret |= keccak_xof_squeeze(&s, out, outlen); 360 break; 361 default: 362 return -1; 363 } 364 keccak_cleanse(&s); 365 return ret; 366 } 367 368 /*** Helper macros to define SHA3 and SHAKE instances. ***/ 369 #define defshake(bits) \ 370 int shake##bits(uint8_t* out, size_t outlen, \ 371 const uint8_t* in, size_t inlen) { \ 372 return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \ 373 } 374 #define defsha3(bits) \ 375 int sha3_##bits(uint8_t* out, size_t outlen, \ 376 const uint8_t* in, size_t inlen) { \ 377 if (outlen > (bits/8)) { \ 378 return -1; \ 379 } \ 380 return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \ 381 } 382 383 /*** FIPS202 SHAKE VOFs ***/ 384 defshake(128) 385 defshake(256) 386 387 /*** FIPS202 SHA3 FOFs ***/ 388 defsha3(224) 389 defsha3(256) 390 defsha3(384) 391 defsha3(512)