tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

main.rs (6871B)


      1 /// A basic tool to convert IPFire Location dumps into the CSV formats that Tor
      2 /// expects.
      3 mod db;
      4 
      5 use argh::FromArgs;
      6 use ipnetwork::IpNetwork;
      7 use rangemap::RangeInclusiveMap;
      8 
      9 use std::fs::File;
     10 use std::io::{BufRead, BufReader, BufWriter, Write};
     11 use std::net::{IpAddr, Ipv6Addr};
     12 use std::num::NonZeroU32;
     13 use std::path::PathBuf;
     14 
     15 fn default_ipv4_path() -> PathBuf {
     16    "./geoip".into()
     17 }
     18 fn default_ipv6_path() -> PathBuf {
     19    "./geoip6".into()
     20 }
     21 
     22 #[derive(FromArgs)]
     23 /// Convert an IPFire Location dump into CSV geoip files.
     24 struct Args {
     25    /// where to store the IPv4 geoip output
     26    #[argh(option, default = "default_ipv4_path()", short = '4')]
     27    output_ipv4: PathBuf,
     28 
     29    /// where to store the IPv6 geoip6 output
     30    #[argh(option, default = "default_ipv6_path()", short = '6')]
     31    output_ipv6: PathBuf,
     32 
     33    /// where to find the dump file
     34    #[argh(option, short = 'i')]
     35    input: PathBuf,
     36 
     37    /// whether to include AS information in our output
     38    #[argh(switch)]
     39    include_asn: bool,
     40 
     41    /// where to store the AS map.
     42    #[argh(option)]
     43    output_asn: Option<PathBuf>,
     44 }
     45 
     46 /// Represents a network block from running `location dump`.
     47 #[derive(Debug, Clone)]
     48 pub struct NetBlock {
     49    pub net: IpNetwork,
     50    pub cc: [u8; 2],
     51    pub asn: Option<NonZeroU32>,
     52    pub is_anon_proxy: bool,
     53    pub is_anycast: bool,
     54    pub is_satellite: bool,
     55 }
     56 
     57 /// Represents an AS definition from running `location dump`.
     58 #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
     59 pub struct AsBlock {
     60    pub asn: NonZeroU32,
     61    pub name: String,
     62 }
     63 
     64 impl PartialEq for NetBlock {
     65    fn eq(&self, other: &Self) -> bool {
     66        self.net == other.net
     67    }
     68 }
     69 
     70 /// We define network blocks as being sorted first from largest to smallest,
     71 /// then by address.
     72 impl Ord for NetBlock {
     73    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
     74        self.net
     75            .prefix()
     76            .cmp(&other.net.prefix())
     77            .then_with(|| self.net.network().cmp(&other.net.network()))
     78    }
     79 }
     80 
     81 impl PartialOrd for NetBlock {
     82    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
     83        Some(self.cmp(other))
     84    }
     85 }
     86 
     87 impl Eq for NetBlock {}
     88 
     89 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
     90 struct NetDefn {
     91    cc: [u8; 2],
     92    asn: Option<NonZeroU32>,
     93 }
     94 
     95 impl NetBlock {
     96    fn into_defn(self, include_asn: bool) -> NetDefn {
     97        if include_asn {
     98            NetDefn {
     99                cc: self.cc,
    100                asn: self.asn,
    101            }
    102        } else {
    103            NetDefn {
    104                cc: self.cc,
    105                asn: None,
    106            }
    107        }
    108    }
    109 }
    110 
    111 impl NetDefn {
    112    fn cc(&self) -> &str {
    113        std::str::from_utf8(&self.cc).unwrap()
    114    }
    115    fn asn(&self) -> u32 {
    116        match self.asn {
    117            Some(v) => v.into(),
    118            None => 0,
    119        }
    120    }
    121 }
    122 
    123 const PROLOGUE: &str = "\
    124 # This file has been converted from the IPFire Location database
    125 # using Tor's geoip-db-tool, which is available in the
    126 # scripts/maint/geoip/geoip-db-tool directory in the Tor source
    127 # code repository at https://gitlab.torproject.org/tpo/core/tor/ .
    128 #
    129 # For more information on the data, see https://location.ipfire.org/.
    130 #
    131 # Below is the header from the original export:
    132 #
    133 ";
    134 
    135 /// Read an input file in the `location dump` format, and write CSV ipv4 and ipv6 files.
    136 ///
    137 /// This code tries to be "efficient enough"; most of the logic is handled by
    138 /// using the rangemap crate.
    139 fn convert(args: Args) -> std::io::Result<()> {
    140    let input = args.input.as_path();
    141    let output_v4 = args.output_ipv4.as_path();
    142    let output_v6 = args.output_ipv6.as_path();
    143    let include_asn = args.include_asn;
    144 
    145    let f = File::open(input)?;
    146    let f = BufReader::new(f);
    147    let mut blocks = Vec::new();
    148    let mut networks = Vec::new();
    149 
    150    let mut reader = db::BlockReader::new(f.lines());
    151    let hdr = reader.extract_header();
    152    // Read blocks, and then sort them by specificity and address.
    153    for nb in reader {
    154        match nb {
    155            db::AnyBlock::As(a) => networks.push(a),
    156            db::AnyBlock::Net(n) => blocks.push(n),
    157            _ => {}
    158        }
    159    }
    160    blocks.sort();
    161 
    162    // Convert the sorted blocks into a map from address ranges into
    163    // country codes.
    164    //
    165    // Note that since we have sorted the blocks from least to most specific,
    166    // we will be puttting them into the maps in the right order, so that the
    167    // most specific rule "wins".
    168    //
    169    // We use u32 and u128 as the index types for these RangeInclusiveMaps,
    170    // so that we don't need to implement a step function for IpAddr.
    171    let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
    172    let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
    173 
    174    let mut n = 0usize;
    175    let num_blocks = blocks.len();
    176    for nb in blocks {
    177        n += 1;
    178        if n.is_multiple_of(100000) {
    179            println!("{n}/{num_blocks}");
    180        }
    181        let start = nb.net.network();
    182        let end = nb.net.broadcast();
    183        match (start, end) {
    184            (IpAddr::V4(a), IpAddr::V4(b)) => {
    185                v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
    186            }
    187            (IpAddr::V6(a), IpAddr::V6(b)) => {
    188                v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
    189            }
    190            (_, _) => panic!("network started and ended in different families!?"),
    191        }
    192    }
    193 
    194    // Write the ranges out to the appropriate files, in order.
    195    let mut v4 = BufWriter::new(File::create(output_v4)?);
    196    let mut v6 = BufWriter::new(File::create(output_v6)?);
    197 
    198    v4.write_all(PROLOGUE.as_bytes())?;
    199    v4.write_all(hdr.as_bytes())?;
    200    for (r, defn) in v4map.iter() {
    201        let a: u32 = *r.start();
    202        let b: u32 = *r.end();
    203        if include_asn {
    204            writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
    205        } else {
    206            writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
    207        }
    208    }
    209 
    210    v6.write_all(PROLOGUE.as_bytes())?;
    211    v6.write_all(hdr.as_bytes())?;
    212    for (r, defn) in v6map.iter() {
    213        let a: Ipv6Addr = (*r.start()).into();
    214        let b: Ipv6Addr = (*r.end()).into();
    215        if include_asn {
    216            writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
    217        } else {
    218            writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
    219        }
    220    }
    221 
    222    // The documentation says you should always flush a BufWriter.
    223    v4.flush()?;
    224    v6.flush()?;
    225 
    226    if let Some(output_asn) = args.output_asn {
    227        networks.sort();
    228        let mut asn = BufWriter::new(File::create(output_asn)?);
    229        for net in networks {
    230            writeln!(&mut asn, "{},{}", net.asn, net.name)?;
    231        }
    232        asn.flush()?;
    233    }
    234 
    235    Ok(())
    236 }
    237 
    238 fn main() -> std::io::Result<()> {
    239    let args: Args = argh::from_env();
    240 
    241    convert(args)
    242 }