tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

bloomfilt.c (3764B)


      1 /* Copyright (c) 2003-2004, Roger Dingledine
      2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
      3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
      4 /* See LICENSE for licensing information */
      5 
      6 /**
      7 * \file bloomfilt.c
      8 * \brief Uses bitarray_t to implement a bloom filter.
      9 **/
     10 
     11 #include <stdlib.h>
     12 
     13 #include "lib/malloc/malloc.h"
     14 #include "lib/container/bloomfilt.h"
     15 #include "lib/intmath/bits.h"
     16 #include "lib/log/util_bug.h"
     17 #include "ext/siphash.h"
     18 
     19 /** How many bloom-filter bits we set per address. This is twice the
     20 * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
     21 * values. */
     22 #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
     23 
     24 struct bloomfilt_t {
     25  /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
     26   * items. */
     27  struct sipkey key[BLOOMFILT_N_HASHES];
     28  bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
     29  uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
     30                  * one less than a power of two. */
     31  bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
     32 };
     33 
     34 #define BIT(set, n) ((n) & (set)->mask)
     35 
     36 /** Add the element <b>item</b> to <b>set</b>. */
     37 void
     38 bloomfilt_add(bloomfilt_t *set,
     39              const void *item)
     40 {
     41  int i;
     42  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
     43    uint64_t h = set->hashfn(&set->key[i], item);
     44    uint32_t high_bits = (uint32_t)(h >> 32);
     45    uint32_t low_bits = (uint32_t)(h);
     46    bitarray_set(set->ba, BIT(set, high_bits));
     47    bitarray_set(set->ba, BIT(set, low_bits));
     48  }
     49 }
     50 
     51 /** If <b>item</b> is in <b>set</b>, return nonzero.  Otherwise,
     52 * <em>probably</em> return zero. */
     53 int
     54 bloomfilt_probably_contains(const bloomfilt_t *set,
     55                            const void *item)
     56 {
     57  int i, matches = 0;
     58  for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
     59    uint64_t h = set->hashfn(&set->key[i], item);
     60    uint32_t high_bits = (uint32_t)(h >> 32);
     61    uint32_t low_bits = (uint32_t)(h);
     62    // Note that !! is necessary here, since bitarray_is_set does not
     63    // necessarily return 1 on true.
     64    matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
     65    matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
     66  }
     67  return matches == N_BITS_PER_ITEM;
     68 }
     69 
     70 /** Return a newly allocated bloomfilt_t, optimized to hold a total of
     71 * <b>max_elements</b> elements with a reasonably low false positive weight.
     72 *
     73 * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
     74 * functions of the items, and the key material <b>random_key</b> to
     75 * key the hash.  There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
     76 **/
     77 bloomfilt_t *
     78 bloomfilt_new(int max_elements,
     79              bloomfilt_hash_fn hashfn,
     80              const uint8_t *random_key)
     81 {
     82  /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
     83   * is the number of hash functions per entry, m is the bits in the array,
     84   * and n is the number of elements inserted.  For us, k==4, n<=max_elements,
     85   * and m==n_bits= approximately max_elements*32.  This gives
     86   *   P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
     87   *
     88   * It would be more optimal in space vs false positives to get this false
     89   * positive rate by going for k==13, and m==18.5n, but we also want to
     90   * conserve CPU, and k==13 is pretty big.
     91   */
     92  int n_bits = 1u << (tor_log2(max_elements)+5);
     93  bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
     94  r->mask = n_bits - 1;
     95  r->ba = bitarray_init_zero(n_bits);
     96 
     97  tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
     98  memcpy(r->key, random_key, sizeof(r->key));
     99 
    100  r->hashfn = hashfn;
    101 
    102  return r;
    103 }
    104 
    105 /** Free all storage held in <b>set</b>. */
    106 void
    107 bloomfilt_free_(bloomfilt_t *set)
    108 {
    109  if (!set)
    110    return;
    111  bitarray_free(set->ba);
    112  tor_free(set);
    113 }