tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

geoip.c (16741B)


      1 /* Copyright (c) 2007-2021, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 /**
      5 * \file geoip.c
      6 * \brief Functions related to maintaining an IP-to-country database;
      7 * to summarizing client connections by country to entry guards, bridges,
      8 * and directory servers; and for statistics on answering network status
      9 * requests.
     10 *
     11 * There are two main kinds of functions in this module: geoip functions,
     12 * which map groups of IPv4 and IPv6 addresses to country codes, and
     13 * statistical functions, which collect statistics about different kinds of
     14 * per-country usage.
     15 *
     16 * The geoip lookup tables are implemented as sorted lists of disjoint address
     17 * ranges, each mapping to a singleton geoip_country_t.  These country objects
     18 * are also indexed by their names in a hashtable.
     19 *
     20 * The tables are populated from disk at startup by the geoip_load_file()
     21 * function.  For more information on the file format they read, see that
     22 * function.  See the scripts and the README file in src/config for more
     23 * information about how those files are generated.
     24 *
     25 * Tor uses GeoIP information in order to implement user requests (such as
     26 * ExcludeNodes {cc}), and to keep track of how much usage relays are getting
     27 * for each country.
     28 */
     29 
     30 #define GEOIP_PRIVATE
     31 #include "lib/geoip/geoip.h"
     32 #include "lib/container/map.h"
     33 #include "lib/container/order.h"
     34 #include "lib/container/smartlist.h"
     35 #include "lib/crypt_ops/crypto_digest.h"
     36 #include "lib/ctime/di_ops.h"
     37 #include "lib/encoding/binascii.h"
     38 #include "lib/fs/files.h"
     39 #include "lib/log/escape.h"
     40 #include "lib/malloc/malloc.h"
     41 #include "lib/net/address.h" //????
     42 #include "lib/net/inaddr.h"
     43 #include "lib/string/compat_ctype.h"
     44 #include "lib/string/compat_string.h"
     45 #include "lib/string/scanf.h"
     46 #include "lib/string/util_string.h"
     47 
     48 #include <stdio.h>
     49 #include <string.h>
     50 
     51 static void init_geoip_countries(void);
     52 
     53 /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
     54 typedef struct geoip_ipv4_entry_t {
     55  uint32_t ip_low; /**< The lowest IP in the range, in host order */
     56  uint32_t ip_high; /**< The highest IP in the range, in host order */
     57  intptr_t country; /**< An index into geoip_countries */
     58 } geoip_ipv4_entry_t;
     59 
     60 /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
     61 typedef struct geoip_ipv6_entry_t {
     62  struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
     63  struct in6_addr ip_high; /**< The highest IP in the range, in host order */
     64  intptr_t country; /**< An index into geoip_countries */
     65 } geoip_ipv6_entry_t;
     66 
     67 /** A list of geoip_country_t */
     68 static smartlist_t *geoip_countries = NULL;
     69 /** A map from lowercased country codes to their position in geoip_countries.
     70 * The index is encoded in the pointer, and 1 is added so that NULL can mean
     71 * not found. */
     72 static strmap_t *country_idxplus1_by_lc_code = NULL;
     73 /** List of all known geoip_ipv4_entry_t sorted
     74 * by their respective ip_low values. */
     75 static smartlist_t *geoip_ipv4_entries = NULL;
     76 /** List of all known geoip_ipv6_entry_t, sorted by their respective
     77 * ip_low values. */
     78 static smartlist_t *geoip_ipv6_entries = NULL;
     79 
     80 /** SHA1 digest of the IPv4 GeoIP file to include in extra-info
     81 * descriptors. */
     82 static char geoip_digest[DIGEST_LEN];
     83 /** SHA1 digest of the IPv6 GeoIP file to include in extra-info
     84 * descriptors. */
     85 static char geoip6_digest[DIGEST_LEN];
     86 
     87 /** Return a list of geoip_country_t for all known countries. */
     88 const smartlist_t *
     89 geoip_get_countries(void)
     90 {
     91  if (geoip_countries == NULL) {
     92    init_geoip_countries();
     93  }
     94  return geoip_countries;
     95 }
     96 
     97 /** Return the index of the <b>country</b>'s entry in the GeoIP
     98 * country list if it is a valid 2-letter country code, otherwise
     99 * return -1. */
    100 MOCK_IMPL(country_t,
    101 geoip_get_country,(const char *country))
    102 {
    103  void *idxplus1_;
    104  intptr_t idx;
    105 
    106  idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
    107  if (!idxplus1_)
    108    return -1;
    109 
    110  idx = ((uintptr_t)idxplus1_)-1;
    111  return (country_t)idx;
    112 }
    113 
    114 /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
    115 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
    116 static void
    117 geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
    118                const char *country)
    119 {
    120  intptr_t idx;
    121  void *idxplus1_;
    122 
    123  IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high))
    124    return;
    125  IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0)
    126    return;
    127 
    128  idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
    129 
    130  if (!idxplus1_) {
    131    geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
    132    strlcpy(c->countrycode, country, sizeof(c->countrycode));
    133    tor_strlower(c->countrycode);
    134    smartlist_add(geoip_countries, c);
    135    idx = smartlist_len(geoip_countries) - 1;
    136    strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
    137  } else {
    138    idx = ((uintptr_t)idxplus1_)-1;
    139  }
    140  {
    141    geoip_country_t *c = smartlist_get(geoip_countries, (int)idx);
    142    tor_assert(!strcasecmp(c->countrycode, country));
    143  }
    144 
    145  if (tor_addr_family(low) == AF_INET) {
    146    geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
    147    ent->ip_low = tor_addr_to_ipv4h(low);
    148    ent->ip_high = tor_addr_to_ipv4h(high);
    149    ent->country = idx;
    150    smartlist_add(geoip_ipv4_entries, ent);
    151  } else if (tor_addr_family(low) == AF_INET6) {
    152    geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
    153    ent->ip_low = *tor_addr_to_in6_assert(low);
    154    ent->ip_high = *tor_addr_to_in6_assert(high);
    155    ent->country = idx;
    156    smartlist_add(geoip_ipv6_entries, ent);
    157  }
    158 }
    159 
    160 /** Add an entry to the GeoIP table indicated by <b>family</b>,
    161 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
    162 STATIC int
    163 geoip_parse_entry(const char *line, sa_family_t family)
    164 {
    165  tor_addr_t low_addr, high_addr;
    166  char c[3];
    167  char *country = NULL;
    168 
    169  if (!geoip_countries)
    170    init_geoip_countries();
    171  if (family == AF_INET) {
    172    if (!geoip_ipv4_entries)
    173      geoip_ipv4_entries = smartlist_new();
    174  } else if (family == AF_INET6) {
    175    if (!geoip_ipv6_entries)
    176      geoip_ipv6_entries = smartlist_new();
    177  } else {
    178    log_warn(LD_GENERAL, "Unsupported family: %d", family);
    179    return -1;
    180  }
    181 
    182  while (TOR_ISSPACE(*line))
    183    ++line;
    184  if (*line == '#')
    185    return 0;
    186 
    187  char buf[512];
    188  if (family == AF_INET) {
    189    unsigned int low, high;
    190    if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
    191        tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
    192      tor_addr_from_ipv4h(&low_addr, low);
    193      tor_addr_from_ipv4h(&high_addr, high);
    194    } else
    195      goto fail;
    196    country = c;
    197  } else {                      /* AF_INET6 */
    198    char *low_str, *high_str;
    199    struct in6_addr low, high;
    200    char *strtok_state;
    201    strlcpy(buf, line, sizeof(buf));
    202    low_str = tor_strtok_r(buf, ",", &strtok_state);
    203    if (!low_str)
    204      goto fail;
    205    high_str = tor_strtok_r(NULL, ",", &strtok_state);
    206    if (!high_str)
    207      goto fail;
    208    country = tor_strtok_r(NULL, "\n", &strtok_state);
    209    if (!country)
    210      goto fail;
    211    if (strlen(country) != 2)
    212      goto fail;
    213    if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
    214      goto fail;
    215    tor_addr_from_in6(&low_addr, &low);
    216    if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
    217      goto fail;
    218    tor_addr_from_in6(&high_addr, &high);
    219  }
    220  geoip_add_entry(&low_addr, &high_addr, country);
    221  return 0;
    222 
    223  fail:
    224  log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
    225           family == AF_INET ? "IPv4" : "IPv6", escaped(line));
    226  return -1;
    227 }
    228 
    229 /** Sorting helper: return -1, 1, or 0 based on comparison of two
    230 * geoip_ipv4_entry_t */
    231 static int
    232 geoip_ipv4_compare_entries_(const void **_a, const void **_b)
    233 {
    234  const geoip_ipv4_entry_t *a = *_a, *b = *_b;
    235  if (a->ip_low < b->ip_low)
    236    return -1;
    237  else if (a->ip_low > b->ip_low)
    238    return 1;
    239  else
    240    return 0;
    241 }
    242 
    243 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
    244 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
    245 static int
    246 geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
    247 {
    248  /* No alignment issue here, since _key really is a pointer to uint32_t */
    249  const uint32_t addr = *(uint32_t *)_key;
    250  const geoip_ipv4_entry_t *entry = *_member;
    251  if (addr < entry->ip_low)
    252    return -1;
    253  else if (addr > entry->ip_high)
    254    return 1;
    255  else
    256    return 0;
    257 }
    258 
    259 /** Sorting helper: return -1, 1, or 0 based on comparison of two
    260 * geoip_ipv6_entry_t */
    261 static int
    262 geoip_ipv6_compare_entries_(const void **_a, const void **_b)
    263 {
    264  const geoip_ipv6_entry_t *a = *_a, *b = *_b;
    265  return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
    266                     sizeof(struct in6_addr));
    267 }
    268 
    269 /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
    270 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
    271 static int
    272 geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
    273 {
    274  const struct in6_addr *addr = (struct in6_addr *)_key;
    275  const geoip_ipv6_entry_t *entry = *_member;
    276 
    277  if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
    278             sizeof(struct in6_addr)) < 0)
    279    return -1;
    280  else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
    281                  sizeof(struct in6_addr)) > 0)
    282    return 1;
    283  else
    284    return 0;
    285 }
    286 
    287 /** Set up a new list of geoip countries with no countries (yet) set in it,
    288 * except for the unknown country.
    289 */
    290 static void
    291 init_geoip_countries(void)
    292 {
    293  geoip_country_t *geoip_unresolved;
    294  geoip_countries = smartlist_new();
    295  /* Add a geoip_country_t for requests that could not be resolved to a
    296   * country as first element (index 0) to geoip_countries. */
    297  geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
    298  strlcpy(geoip_unresolved->countrycode, "??",
    299          sizeof(geoip_unresolved->countrycode));
    300  smartlist_add(geoip_countries, geoip_unresolved);
    301  country_idxplus1_by_lc_code = strmap_new();
    302  strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
    303 }
    304 
    305 /** Clear appropriate GeoIP database, based on <b>family</b>, and
    306 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
    307 * failure.
    308 *
    309 * Recognized line formats for IPv4 are:
    310 *   INTIPLOW,INTIPHIGH,CC
    311 * and
    312 *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
    313 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
    314 * integers, and CC is a country code.
    315 *
    316 * Recognized line format for IPv6 is:
    317 *   IPV6LOW,IPV6HIGH,CC
    318 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
    319 *
    320 * It also recognizes, and skips over, blank lines and lines that start
    321 * with '#' (comments).
    322 */
    323 int
    324 geoip_load_file(sa_family_t family, const char *filename, int severity)
    325 {
    326  FILE *f;
    327  crypto_digest_t *geoip_digest_env = NULL;
    328 
    329  tor_assert(family == AF_INET || family == AF_INET6);
    330 
    331  if (!(f = tor_fopen_cloexec(filename, "r"))) {
    332    log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.",
    333           filename);
    334    return -1;
    335  }
    336  if (!geoip_countries)
    337    init_geoip_countries();
    338 
    339  if (family == AF_INET) {
    340    if (geoip_ipv4_entries) {
    341      SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
    342                        tor_free(e));
    343      smartlist_free(geoip_ipv4_entries);
    344    }
    345    geoip_ipv4_entries = smartlist_new();
    346  } else { /* AF_INET6 */
    347    if (geoip_ipv6_entries) {
    348      SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
    349                        tor_free(e));
    350      smartlist_free(geoip_ipv6_entries);
    351    }
    352    geoip_ipv6_entries = smartlist_new();
    353  }
    354  geoip_digest_env = crypto_digest_new();
    355 
    356  log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
    357             (family == AF_INET) ? "IPv4" : "IPv6", filename);
    358  while (!feof(f)) {
    359    char buf[512];
    360    if (fgets(buf, (int)sizeof(buf), f) == NULL)
    361      break;
    362    crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
    363    /* FFFF track full country name. */
    364    geoip_parse_entry(buf, family);
    365  }
    366  /*XXXX abort and return -1 if no entries/illformed?*/
    367  fclose(f);
    368 
    369  /* Sort list and remember file digests so that we can include it in
    370   * our extra-info descriptors. */
    371  if (family == AF_INET) {
    372    smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
    373    crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
    374  } else {
    375    /* AF_INET6 */
    376    smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
    377    crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
    378  }
    379  crypto_digest_free(geoip_digest_env);
    380 
    381  return 0;
    382 }
    383 
    384 /** Given an IP address in host order, return a number representing the
    385 * country to which that address belongs, -1 for "No geoip information
    386 * available", or 0 for the 'unknown country'.  The return value will always
    387 * be less than geoip_get_n_countries().  To decode it, call
    388 * geoip_get_country_name().
    389 */
    390 STATIC int
    391 geoip_get_country_by_ipv4(uint32_t ipaddr)
    392 {
    393  geoip_ipv4_entry_t *ent;
    394  if (!geoip_ipv4_entries)
    395    return -1;
    396  ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
    397                          geoip_ipv4_compare_key_to_entry_);
    398  return ent ? (int)ent->country : 0;
    399 }
    400 
    401 /** Given an IPv6 address, return a number representing the country to
    402 * which that address belongs, -1 for "No geoip information available", or
    403 * 0 for the 'unknown country'.  The return value will always be less than
    404 * geoip_get_n_countries().  To decode it, call geoip_get_country_name().
    405 */
    406 STATIC int
    407 geoip_get_country_by_ipv6(const struct in6_addr *addr)
    408 {
    409  geoip_ipv6_entry_t *ent;
    410 
    411  if (!geoip_ipv6_entries)
    412    return -1;
    413  ent = smartlist_bsearch(geoip_ipv6_entries, addr,
    414                          geoip_ipv6_compare_key_to_entry_);
    415  return ent ? (int)ent->country : 0;
    416 }
    417 
    418 /** Given an IP address, return a number representing the country to which
    419 * that address belongs, -1 for "No geoip information available", or 0 for
    420 * the 'unknown country'.  The return value will always be less than
    421 * geoip_get_n_countries().  To decode it, call geoip_get_country_name().
    422 */
    423 MOCK_IMPL(int,
    424 geoip_get_country_by_addr,(const tor_addr_t *addr))
    425 {
    426  if (tor_addr_family(addr) == AF_INET) {
    427    return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
    428  } else if (tor_addr_family(addr) == AF_INET6) {
    429    return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
    430  } else {
    431    return -1;
    432  }
    433 }
    434 
    435 /** Return the number of countries recognized by the GeoIP country list. */
    436 MOCK_IMPL(int,
    437 geoip_get_n_countries,(void))
    438 {
    439  if (!geoip_countries)
    440    init_geoip_countries();
    441  return (int) smartlist_len(geoip_countries);
    442 }
    443 
    444 /** Return the two-letter country code associated with the number <b>num</b>,
    445 * or "??" for an unknown value. */
    446 const char *
    447 geoip_get_country_name(country_t num)
    448 {
    449  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
    450    geoip_country_t *c = smartlist_get(geoip_countries, num);
    451    return c->countrycode;
    452  } else
    453    return "??";
    454 }
    455 
    456 /** Return true iff we have loaded a GeoIP database.*/
    457 MOCK_IMPL(int,
    458 geoip_is_loaded,(sa_family_t family))
    459 {
    460  tor_assert(family == AF_INET || family == AF_INET6);
    461  if (geoip_countries == NULL)
    462    return 0;
    463  if (family == AF_INET)
    464    return geoip_ipv4_entries != NULL;
    465  else                          /* AF_INET6 */
    466    return geoip_ipv6_entries != NULL;
    467 }
    468 
    469 /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
    470 * result does not need to be deallocated, but will be overwritten by the
    471 * next call of hex_str(). */
    472 const char *
    473 geoip_db_digest(sa_family_t family)
    474 {
    475  tor_assert(family == AF_INET || family == AF_INET6);
    476  if (family == AF_INET)
    477    return hex_str(geoip_digest, DIGEST_LEN);
    478  else                          /* AF_INET6 */
    479    return hex_str(geoip6_digest, DIGEST_LEN);
    480 }
    481 
    482 /** Release all storage held by the GeoIP databases and country list. */
    483 STATIC void
    484 clear_geoip_db(void)
    485 {
    486  if (geoip_countries) {
    487    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
    488    smartlist_free(geoip_countries);
    489  }
    490 
    491  strmap_free(country_idxplus1_by_lc_code, NULL);
    492  if (geoip_ipv4_entries) {
    493    SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
    494                      tor_free(ent));
    495    smartlist_free(geoip_ipv4_entries);
    496  }
    497  if (geoip_ipv6_entries) {
    498    SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
    499                      tor_free(ent));
    500    smartlist_free(geoip_ipv6_entries);
    501  }
    502  geoip_countries = NULL;
    503  country_idxplus1_by_lc_code = NULL;
    504  geoip_ipv4_entries = NULL;
    505  geoip_ipv6_entries = NULL;
    506 }
    507 
    508 /** Release all storage held in this file. */
    509 void
    510 geoip_free_all(void)
    511 {
    512  clear_geoip_db();
    513 
    514  memset(geoip_digest, 0, sizeof(geoip_digest));
    515  memset(geoip6_digest, 0, sizeof(geoip6_digest));
    516 }