tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

polyval.c (17236B)


      1 /* Copyright (c) 2025, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 /**
      5 * \file polyval.h
      6 * \brief Implementation for polyval universal hash function.
      7 *
      8 * Polyval, which was first defined for AES-GCM-SIV, is a
      9 * universal hash based on multiplication in GF(2^128).
     10 * Unlike the more familiar GHASH, it is defined to work on
     11 * little-endian inputs, and so is more straightforward and efficient
     12 * on little-endian architectures.
     13 *
     14 * In Tor we use it as part of the Counter Galois Onion relay
     15 * encryption format.
     16 **/
     17 
     18 /* Implementation notes:
     19 *
     20 * Our implementation is based on the GHASH code from BearSSL
     21 * by Thomas Pornin.  There are three implementations:
     22 *
     23 * pclmul.c -- An x86-only version, based on the CLMUL instructions
     24 * introduced for westlake processors in 2010.
     25 *
     26 * ctmul64.c -- A portable contant-time implementation for 64-bit
     27 * processors.
     28 *
     29 * ctmul.c -- A portable constant-time implementation for 32-bit
     30 * processors with an efficient 32x32->64 multiply instruction.
     31 *
     32 * Note that the "ctmul" implementations are only constant-time
     33 * if the corresponding CPU multiply and rotate instructions are
     34 * also constant-time.  But that's an architectural assumption we
     35 * make in Tor.
     36 */
     37 
     38 #include "ext/polyval/polyval.h"
     39 #include "lib/cc/compat_compiler.h"
     40 
     41 #include <string.h>
     42 
     43 #ifdef PV_USE_PCLMUL_DETECT
     44 #include <cpuid.h>
     45 #endif
     46 
     47 typedef pv_u128_ u128;
     48 
     49 /* ========
     50 * We declare these functions, to help implement polyval.
     51 *
     52 * They have different definitions depending on our representation
     53 * of 128-bit integers.
     54 */
     55 #if 0
     56 /**
     57 * Read a u128-bit little-endian integer from 'bytes',
     58 * which may not be aligned.
     59 */
     60 static inline u128 u128_from_bytes(const uint8_t *bytes);
     61 /**
     62 * Store a u128-bit little-endian integer to 'bytes_out',
     63 * which may not be aligned.
     64 */
     65 static inline void u128_to_bytes(u128, uint8_t *bytes_out);
     66 /**
     67 * XOR a u128 into the y field of a polyval_t struct.
     68 *
     69 * (Within the polyval struct, perform "y ^= v").
     70 */
     71 static inline void pv_xor_y(polyval_t *, u128 v);
     72 
     73 /* ========
     74 * The function which we expect our backend to implement.
     75 */
     76 /**
     77 * Within the polyval struct, perform "y *= h".
     78 *
     79 * (This is a carryless multiply in the Polyval galois field)
     80 */
     81 static void pv_mul_y_h(polyval_t *);h
     82 #endif
     83 
     84 /* =====
     85 * Endianness conversion for big-endian platforms
     86 */
     87 #ifdef WORDS_BIG_ENDIAN
     88 #ifdef __GNUC__
     89 #define bswap64(x) __builtin_bswap64(x)
     90 #define bswap32(x) __builtin_bswap32(x)
     91 #else
     92 /* The (compiler should optimize these into a decent bswap instruction) */
     93 static inline uint64_t
     94 bswap64(uint64_t v)
     95 {
     96  return
     97    ((value & 0xFF00000000000000) >> 56) |
     98    ((value & 0x00FF000000000000) >> 48) |
     99    ((value & 0x0000FF0000000000) >> 40) |
    100    ((value & 0x000000FF00000000) >> 32) |
    101    ((value & 0x00000000FF000000) >> 24) |
    102    ((value & 0x0000000000FF0000) >> 16) |
    103    ((value & 0x000000000000FF00) >> 8) |
    104    ((value & 0x00000000000000FF));
    105 }
    106 static inline uint64_t
    107 bswap32(uint64_t v)
    108 {
    109  return
    110    ((value & 0xFF000000) >> 24) |
    111    ((value & 0x00FF0000) >> 16) |
    112    ((value & 0x0000FF00) >> 8) |
    113    ((value & 0x000000FF));
    114 }
    115 #endif
    116 #endif
    117 
    118 #ifdef WORDS_BIG_ENDIAN
    119 #define convert_byte_order64(x) bswap64(x)
    120 #define convert_byte_order32(x) bswap32(x)
    121 #else
    122 #define convert_byte_order64(x) (x)
    123 #define convert_byte_order32(x) (x)
    124 #endif
    125 
    126 #if defined PV_USE_PCLMUL_UNCONDITIONAL
    127 #define PCLMUL_MEMBER(v) (v)
    128 #define PV_USE_PCLMUL
    129 
    130 #elif defined PV_USE_PCLMUL_DETECT
    131 #define PCLMUL_MEMBER(v) (v).u128x1
    132 #define CTMUL64_MEMBER(v) (v).u64x2
    133 #define PV_USE_PCLMUL
    134 #define PV_USE_CTMUL64
    135 
    136 #elif defined PV_USE_CTMUL64
    137 #define CTMUL64_MEMBER(v) (v)
    138 #endif
    139 
    140 #ifdef PV_USE_PCLMUL
    141 #include "ext/polyval/pclmul.c"
    142 
    143 DISABLE_GCC_WARNING("-Waggregate-return")
    144 static inline u128
    145 u128_from_bytes_pclmul(const uint8_t *bytes)
    146 {
    147  u128 r;
    148  PCLMUL_MEMBER(r) = _mm_loadu_si128((const __m128i*)bytes);
    149  return r;
    150 }
    151 ENABLE_GCC_WARNING("-Waggregate-return")
    152 
    153 static inline void
    154 u128_to_bytes_pclmul(u128 val, uint8_t *bytes_out)
    155 {
    156  _mm_storeu_si128((__m128i*)bytes_out, PCLMUL_MEMBER(val));
    157 }
    158 static inline void
    159 pv_xor_y_pclmul(polyval_t *pv, u128 v)
    160 {
    161  PCLMUL_MEMBER(pv->y) = _mm_xor_si128(PCLMUL_MEMBER(pv->y),
    162                                       PCLMUL_MEMBER(v));
    163 }
    164 #endif
    165 
    166 #if defined(PV_USE_CTMUL64)
    167 #include "ext/polyval/ctmul64.c"
    168 
    169 DISABLE_GCC_WARNING("-Waggregate-return")
    170 static inline u128
    171 u128_from_bytes_ctmul64(const uint8_t *bytes)
    172 {
    173  u128 r;
    174  memcpy(&CTMUL64_MEMBER(r).lo, bytes, 8);
    175  memcpy(&CTMUL64_MEMBER(r).hi, bytes + 8, 8);
    176  CTMUL64_MEMBER(r).lo = convert_byte_order64(CTMUL64_MEMBER(r).lo);
    177  CTMUL64_MEMBER(r).hi = convert_byte_order64(CTMUL64_MEMBER(r).hi);
    178  return r;
    179 }
    180 ENABLE_GCC_WARNING("-Waggregate-return")
    181 
    182 static inline void
    183 u128_to_bytes_ctmul64(u128 val, uint8_t *bytes_out)
    184 {
    185  uint64_t lo = convert_byte_order64(CTMUL64_MEMBER(val).lo);
    186  uint64_t hi = convert_byte_order64(CTMUL64_MEMBER(val).hi);
    187  memcpy(bytes_out, &lo, 8);
    188  memcpy(bytes_out + 8, &hi, 8);
    189 }
    190 static inline void
    191 pv_xor_y_ctmul64(polyval_t *pv, u128 val)
    192 {
    193  CTMUL64_MEMBER(pv->y).lo ^= CTMUL64_MEMBER(val).lo;
    194  CTMUL64_MEMBER(pv->y).hi ^= CTMUL64_MEMBER(val).hi;
    195 }
    196 #endif
    197 
    198 #if defined(PV_USE_CTMUL)
    199 #include "ext/polyval/ctmul.c"
    200 
    201 DISABLE_GCC_WARNING("-Waggregate-return")
    202 static inline u128
    203 u128_from_bytes_ctmul(const uint8_t *bytes)
    204 {
    205  u128 r;
    206  memcpy(&r.v, bytes, 16);
    207  for (int i = 0; i < 4; ++i) {
    208    r.v[i] = convert_byte_order32(r.v[i]);
    209  }
    210  return r;
    211 }
    212 ENABLE_GCC_WARNING("-Waggregate-return")
    213 
    214 static inline void
    215 u128_to_bytes_ctmul(u128 val, uint8_t *bytes_out)
    216 {
    217  uint32_t v[4];
    218  for (int i = 0; i < 4; ++i) {
    219    v[i] = convert_byte_order32(val.v[i]);
    220  }
    221  memcpy(bytes_out, v, 16);
    222 }
    223 static inline void
    224 pv_xor_y_ctmul(polyval_t *pv, u128 val)
    225 {
    226  for (int i = 0; i < 4; ++i) {
    227    pv->y.v[i] ^= val.v[i];
    228  }
    229 }
    230 #endif
    231 
    232 struct expanded_key_none {};
    233 static inline void add_multiple_none(polyval_t *pv,
    234                                     const uint8_t *input,
    235                                     const struct expanded_key_none *expanded)
    236 {
    237  (void) pv;
    238  (void) input;
    239  (void) expanded;
    240 }
    241 static inline void expand_key_none(const polyval_t *inp,
    242                                   struct expanded_key_none *out)
    243 {
    244  (void) inp;
    245  (void) out;
    246 }
    247 
    248 /* Kludge: a special value to use for block_stride when we don't support
    249 * processing multiple blocks at once.  Previously we used 0, but that
    250 * caused warnings with some comparisons. */
    251 #define BLOCK_STRIDE_NONE  0xffff
    252 
    253 DISABLE_GCC_WARNING("-Waggregate-return")
    254 #define PV_DECLARE(prefix,                                              \
    255                   st,                                                  \
    256                   u128_from_bytes,                                     \
    257                   u128_to_bytes,                                       \
    258                   pv_xor_y,                                            \
    259                   pv_mul_y_h,                                          \
    260                   block_stride,                                        \
    261                   expanded_key_tp, expand_fn, add_multiple_fn)         \
    262  st void                                                               \
    263  prefix ## polyval_key_init(polyval_key_t *pvk, const uint8_t *key)    \
    264  {                                                                     \
    265    pvk->h = u128_from_bytes(key);                                      \
    266  }                                                                     \
    267  st void                                                               \
    268  prefix ## polyval_init(polyval_t *pv, const uint8_t *key)             \
    269  {                                                                     \
    270    polyval_key_init(&pv->key, key);                                    \
    271    memset(&pv->y, 0, sizeof(u128));                                    \
    272  }                                                                     \
    273  st void                                                               \
    274  prefix ## polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) \
    275  {                                                                     \
    276    memcpy(&pv->key, key, sizeof(polyval_key_t));                       \
    277    memset(&pv->y, 0, sizeof(u128));                                    \
    278  }                                                                     \
    279  st void                                                               \
    280  prefix ## polyval_add_block(polyval_t *pv, const uint8_t *block)      \
    281  {                                                                     \
    282    u128 b = u128_from_bytes(block);                                    \
    283    pv_xor_y(pv, b);                                                    \
    284    pv_mul_y_h(pv);                                                     \
    285  }                                                                     \
    286  st void                                                               \
    287  prefix ## polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) \
    288  {                                                                     \
    289    /* since block_stride is a constant, this should get optimized */   \
    290    if ((block_stride != BLOCK_STRIDE_NONE)                             \
    291        && n >= (block_stride) * 16) {                                  \
    292      expanded_key_tp expanded_key;                                     \
    293      expand_fn(pv, &expanded_key);                                     \
    294      while (n >= (block_stride) * 16) {                                \
    295        add_multiple_fn(pv, data, &expanded_key);                       \
    296        n -= block_stride*16;                                           \
    297        data += block_stride * 16;                                      \
    298      }                                                                 \
    299    }                                                                   \
    300    while (n > 16) {                                                    \
    301      polyval_add_block(pv, data);                                      \
    302      data += 16;                                                       \
    303      n -= 16;                                                          \
    304    }                                                                   \
    305    if (n) {                                                            \
    306      uint8_t block[16];                                                \
    307      memset(&block, 0, sizeof(block));                                 \
    308      memcpy(block, data, n);                                           \
    309      polyval_add_block(pv, block);                                     \
    310    }                                                                   \
    311  }                                                                     \
    312  st void                                                               \
    313  prefix ## polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)      \
    314  {                                                                     \
    315    u128_to_bytes(pv->y, tag_out);                                      \
    316  }                                                                     \
    317  st void                                                               \
    318  prefix ## polyval_reset(polyval_t *pv)                                \
    319  {                                                                     \
    320    memset(&pv->y, 0, sizeof(u128));                                    \
    321  }
    322 ENABLE_GCC_WARNING("-Waggregate-return")
    323 
    324 #ifdef PV_USE_PCLMUL_DETECT
    325 /* We use a boolean to distinguish whether to use the PCLMUL instructions,
    326 * but instead we could use function pointers.  It's probably worth
    327 * benchmarking, though it's unlikely to make a measurable difference.
    328 */
    329 static bool use_pclmul = false;
    330 
    331 /* Declare _both_ variations of our code, statically,
    332 * with different prefixes. */
    333 DISABLE_GCC_WARNING("-Waggregate-return")
    334 PV_DECLARE(pclmul_, static,
    335           u128_from_bytes_pclmul,
    336           u128_to_bytes_pclmul,
    337           pv_xor_y_pclmul,
    338           pv_mul_y_h_pclmul,
    339           PV_BLOCK_STRIDE,
    340           pv_expanded_key_t,
    341           expand_key_pclmul,
    342           pv_add_multiple_pclmul)
    343 
    344 PV_DECLARE(ctmul64_, static,
    345           u128_from_bytes_ctmul64,
    346           u128_to_bytes_ctmul64,
    347           pv_xor_y_ctmul64,
    348           pv_mul_y_h_ctmul64,
    349           BLOCK_STRIDE_NONE,
    350           struct expanded_key_none,
    351           expand_key_none,
    352           add_multiple_none)
    353 ENABLE_GCC_WARNING("-Waggregate-return")
    354 
    355 void
    356 polyval_key_init(polyval_key_t *pv, const uint8_t *key)
    357 {
    358  if (use_pclmul)
    359    pclmul_polyval_key_init(pv, key);
    360  else
    361    ctmul64_polyval_key_init(pv, key);
    362 }
    363 void
    364 polyval_init(polyval_t *pv, const uint8_t *key)
    365 {
    366  if (use_pclmul)
    367    pclmul_polyval_init(pv, key);
    368  else
    369    ctmul64_polyval_init(pv, key);
    370 }
    371 void
    372 polyval_init_from_key(polyval_t *pv, const polyval_key_t *key)
    373 {
    374  if (use_pclmul)
    375    pclmul_polyval_init_from_key(pv, key);
    376  else
    377    ctmul64_polyval_init_from_key(pv, key);
    378 }
    379 void
    380 polyval_add_block(polyval_t *pv, const uint8_t *block)
    381 {
    382  if (use_pclmul)
    383    pclmul_polyval_add_block(pv, block);
    384  else
    385    ctmul64_polyval_add_block(pv, block);
    386 }
    387 void
    388 polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n)
    389 {
    390  if (use_pclmul)
    391    pclmul_polyval_add_zpad(pv, data, n);
    392  else
    393    ctmul64_polyval_add_zpad(pv, data, n);
    394 }
    395 void
    396 polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)
    397 {
    398  if (use_pclmul)
    399    pclmul_polyval_get_tag(pv, tag_out);
    400  else
    401    ctmul64_polyval_get_tag(pv, tag_out);
    402 }
    403 void
    404 polyval_reset(polyval_t *pv)
    405 {
    406  if (use_pclmul)
    407    pclmul_polyval_reset(pv);
    408  else
    409    ctmul64_polyval_reset(pv);
    410 }
    411 
    412 #elif defined(PV_USE_PCLMUL)
    413 PV_DECLARE(, ,
    414           u128_from_bytes_pclmul,
    415           u128_to_bytes_pclmul,
    416           pv_xor_y_pclmul,
    417           pv_mul_y_h_pclmul,
    418           PV_BLOCK_STRIDE,
    419           pv_expanded_key_t,
    420           expand_key_pclmul,
    421           pv_add_multiple_pclmul)
    422 #elif defined(PV_USE_CTMUL64)
    423 PV_DECLARE(, ,
    424           u128_from_bytes_ctmul64,
    425           u128_to_bytes_ctmul64,
    426           pv_xor_y_ctmul64,
    427           pv_mul_y_h_ctmul64,
    428           BLOCK_STRIDE_NONE,
    429           struct expanded_key_none,
    430           expand_key_none,
    431           add_multiple_none)
    432 
    433 #elif defined(PV_USE_CTMUL)
    434 DISABLE_GCC_WARNING("-Waggregate-return")
    435 PV_DECLARE(, , u128_from_bytes_ctmul,
    436           u128_to_bytes_ctmul,
    437           pv_xor_y_ctmul,
    438           pv_mul_y_h_ctmul,
    439           BLOCK_STRIDE_NONE,
    440           struct expanded_key_none,
    441           expand_key_none,
    442           add_multiple_none)
    443 ENABLE_GCC_WARNING("-Waggregate-return")
    444 #endif
    445 
    446 #ifdef PV_USE_PCLMUL_DETECT
    447 void
    448 polyval_detect_implementation(void)
    449 {
    450  unsigned int eax, ebx, ecx, edx;
    451  use_pclmul = false;
    452  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
    453    if (0 != (ecx & bit_PCLMUL)) {
    454      use_pclmul = true;
    455    }
    456  }
    457 }
    458 #else
    459 void
    460 polyval_detect_implementation(void)
    461 {
    462 }
    463 #endif
    464 
    465 #ifdef POLYVAL_USE_EXPANDED_KEYS
    466 
    467 #ifdef PV_USE_PCLMUL_DETECT
    468 #define SHOULD_EXPAND() (use_pclmul)
    469 #else
    470 #define SHOULD_EXPAND() (1)
    471 #endif
    472 
    473 void
    474 polyvalx_init(polyvalx_t *pvx, const uint8_t *key)
    475 {
    476  polyval_init(&pvx->pv, key);
    477  if (SHOULD_EXPAND()) {
    478    expand_key_pclmul(&pvx->pv, &pvx->expanded);
    479  }
    480 }
    481 void
    482 polyvalx_init_from_key(polyvalx_t *pvx, const polyval_key_t *key)
    483 {
    484  polyval_init_from_key(&pvx->pv, key);
    485  if (SHOULD_EXPAND()) {
    486    expand_key_pclmul(&pvx->pv, &pvx->expanded);
    487  }
    488 }
    489 void
    490 polyvalx_add_block(polyvalx_t *pvx, const uint8_t *block)
    491 {
    492  polyval_add_block(&pvx->pv, block);
    493 }
    494 void
    495 polyvalx_add_zpad(polyvalx_t *pvx, const uint8_t *data, size_t n)
    496 {
    497  if (SHOULD_EXPAND() && n >= PV_BLOCK_STRIDE * 16) {
    498    while (n > PV_BLOCK_STRIDE * 16) {
    499      pv_add_multiple_pclmul(&pvx->pv, data, &pvx->expanded);
    500      data += PV_BLOCK_STRIDE * 16;
    501      n -= PV_BLOCK_STRIDE * 16;
    502    }
    503  }
    504  while (n > 16) {
    505    polyval_add_block(&pvx->pv, data);
    506    data += 16;
    507    n -= 16;
    508  }
    509  if (n) {
    510    uint8_t block[16];
    511    memset(&block, 0, sizeof(block));
    512    memcpy(block, data, n);
    513    polyval_add_block(&pvx->pv, block);
    514  }
    515 }
    516 void
    517 polyvalx_get_tag(const polyvalx_t *pvx, uint8_t *tag_out)
    518 {
    519  polyval_get_tag(&pvx->pv, tag_out);
    520 }
    521 void polyvalx_reset(polyvalx_t *pvx)
    522 {
    523  polyval_reset(&pvx->pv);
    524 }
    525 #endif
    526 
    527 #if 0
    528 #include <stdio.h>
    529 int
    530 main(int c, char **v)
    531 {
    532  // From RFC 8452 appendix A
    533  uint8_t key[16] =
    534    { 0x25, 0x62, 0x93, 0x47, 0x58, 0x92, 0x42, 0x76,
    535      0x1d, 0x31, 0xf8, 0x26, 0xba, 0x4b, 0x75, 0x7b  };
    536  uint8_t block1[16] =
    537    { 0x4f, 0x4f, 0x95, 0x66, 0x8c, 0x83, 0xdf, 0xb6,
    538      0x40, 0x17, 0x62, 0xbb, 0x2d, 0x01, 0xa2, 0x62 };
    539  uint8_t block2[16] = {
    540    0xd1, 0xa2, 0x4d, 0xdd, 0x27, 0x21, 0xd0, 0x06,
    541    0xbb, 0xe4, 0x5f, 0x20, 0xd3, 0xc9, 0xf3, 0x62 };
    542  uint8_t expect_result[16] = {
    543    0xf7, 0xa3, 0xb4, 0x7b, 0x84, 0x61, 0x19, 0xfa,
    544    0xe5, 0xb7, 0x86, 0x6c, 0xf5, 0xe5, 0xb7, 0x7e };
    545 
    546  polyval_t pv;
    547  uint8_t tag[16];
    548  polyval_init(&pv, key);
    549  polyval_add_block(&pv, block1);
    550  polyval_add_block(&pv, block2);
    551  polyval_get_tag(&pv, tag);
    552  if (!memcmp(expect_result, tag, 16)) {
    553    puts("OK");
    554    return 0;
    555  }  else {
    556    puts("NOPE");
    557    return 1;
    558  }
    559 }
    560 #endif