main.rs (6871B)
1 /// A basic tool to convert IPFire Location dumps into the CSV formats that Tor 2 /// expects. 3 mod db; 4 5 use argh::FromArgs; 6 use ipnetwork::IpNetwork; 7 use rangemap::RangeInclusiveMap; 8 9 use std::fs::File; 10 use std::io::{BufRead, BufReader, BufWriter, Write}; 11 use std::net::{IpAddr, Ipv6Addr}; 12 use std::num::NonZeroU32; 13 use std::path::PathBuf; 14 15 fn default_ipv4_path() -> PathBuf { 16 "./geoip".into() 17 } 18 fn default_ipv6_path() -> PathBuf { 19 "./geoip6".into() 20 } 21 22 #[derive(FromArgs)] 23 /// Convert an IPFire Location dump into CSV geoip files. 24 struct Args { 25 /// where to store the IPv4 geoip output 26 #[argh(option, default = "default_ipv4_path()", short = '4')] 27 output_ipv4: PathBuf, 28 29 /// where to store the IPv6 geoip6 output 30 #[argh(option, default = "default_ipv6_path()", short = '6')] 31 output_ipv6: PathBuf, 32 33 /// where to find the dump file 34 #[argh(option, short = 'i')] 35 input: PathBuf, 36 37 /// whether to include AS information in our output 38 #[argh(switch)] 39 include_asn: bool, 40 41 /// where to store the AS map. 42 #[argh(option)] 43 output_asn: Option<PathBuf>, 44 } 45 46 /// Represents a network block from running `location dump`. 47 #[derive(Debug, Clone)] 48 pub struct NetBlock { 49 pub net: IpNetwork, 50 pub cc: [u8; 2], 51 pub asn: Option<NonZeroU32>, 52 pub is_anon_proxy: bool, 53 pub is_anycast: bool, 54 pub is_satellite: bool, 55 } 56 57 /// Represents an AS definition from running `location dump`. 58 #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] 59 pub struct AsBlock { 60 pub asn: NonZeroU32, 61 pub name: String, 62 } 63 64 impl PartialEq for NetBlock { 65 fn eq(&self, other: &Self) -> bool { 66 self.net == other.net 67 } 68 } 69 70 /// We define network blocks as being sorted first from largest to smallest, 71 /// then by address. 72 impl Ord for NetBlock { 73 fn cmp(&self, other: &Self) -> std::cmp::Ordering { 74 self.net 75 .prefix() 76 .cmp(&other.net.prefix()) 77 .then_with(|| self.net.network().cmp(&other.net.network())) 78 } 79 } 80 81 impl PartialOrd for NetBlock { 82 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 83 Some(self.cmp(other)) 84 } 85 } 86 87 impl Eq for NetBlock {} 88 89 #[derive(Copy, Clone, Eq, PartialEq, Debug)] 90 struct NetDefn { 91 cc: [u8; 2], 92 asn: Option<NonZeroU32>, 93 } 94 95 impl NetBlock { 96 fn into_defn(self, include_asn: bool) -> NetDefn { 97 if include_asn { 98 NetDefn { 99 cc: self.cc, 100 asn: self.asn, 101 } 102 } else { 103 NetDefn { 104 cc: self.cc, 105 asn: None, 106 } 107 } 108 } 109 } 110 111 impl NetDefn { 112 fn cc(&self) -> &str { 113 std::str::from_utf8(&self.cc).unwrap() 114 } 115 fn asn(&self) -> u32 { 116 match self.asn { 117 Some(v) => v.into(), 118 None => 0, 119 } 120 } 121 } 122 123 const PROLOGUE: &str = "\ 124 # This file has been converted from the IPFire Location database 125 # using Tor's geoip-db-tool, which is available in the 126 # scripts/maint/geoip/geoip-db-tool directory in the Tor source 127 # code repository at https://gitlab.torproject.org/tpo/core/tor/ . 128 # 129 # For more information on the data, see https://location.ipfire.org/. 130 # 131 # Below is the header from the original export: 132 # 133 "; 134 135 /// Read an input file in the `location dump` format, and write CSV ipv4 and ipv6 files. 136 /// 137 /// This code tries to be "efficient enough"; most of the logic is handled by 138 /// using the rangemap crate. 139 fn convert(args: Args) -> std::io::Result<()> { 140 let input = args.input.as_path(); 141 let output_v4 = args.output_ipv4.as_path(); 142 let output_v6 = args.output_ipv6.as_path(); 143 let include_asn = args.include_asn; 144 145 let f = File::open(input)?; 146 let f = BufReader::new(f); 147 let mut blocks = Vec::new(); 148 let mut networks = Vec::new(); 149 150 let mut reader = db::BlockReader::new(f.lines()); 151 let hdr = reader.extract_header(); 152 // Read blocks, and then sort them by specificity and address. 153 for nb in reader { 154 match nb { 155 db::AnyBlock::As(a) => networks.push(a), 156 db::AnyBlock::Net(n) => blocks.push(n), 157 _ => {} 158 } 159 } 160 blocks.sort(); 161 162 // Convert the sorted blocks into a map from address ranges into 163 // country codes. 164 // 165 // Note that since we have sorted the blocks from least to most specific, 166 // we will be puttting them into the maps in the right order, so that the 167 // most specific rule "wins". 168 // 169 // We use u32 and u128 as the index types for these RangeInclusiveMaps, 170 // so that we don't need to implement a step function for IpAddr. 171 let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new(); 172 let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new(); 173 174 let mut n = 0usize; 175 let num_blocks = blocks.len(); 176 for nb in blocks { 177 n += 1; 178 if n.is_multiple_of(100000) { 179 println!("{n}/{num_blocks}"); 180 } 181 let start = nb.net.network(); 182 let end = nb.net.broadcast(); 183 match (start, end) { 184 (IpAddr::V4(a), IpAddr::V4(b)) => { 185 v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); 186 } 187 (IpAddr::V6(a), IpAddr::V6(b)) => { 188 v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn)); 189 } 190 (_, _) => panic!("network started and ended in different families!?"), 191 } 192 } 193 194 // Write the ranges out to the appropriate files, in order. 195 let mut v4 = BufWriter::new(File::create(output_v4)?); 196 let mut v6 = BufWriter::new(File::create(output_v6)?); 197 198 v4.write_all(PROLOGUE.as_bytes())?; 199 v4.write_all(hdr.as_bytes())?; 200 for (r, defn) in v4map.iter() { 201 let a: u32 = *r.start(); 202 let b: u32 = *r.end(); 203 if include_asn { 204 writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; 205 } else { 206 writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?; 207 } 208 } 209 210 v6.write_all(PROLOGUE.as_bytes())?; 211 v6.write_all(hdr.as_bytes())?; 212 for (r, defn) in v6map.iter() { 213 let a: Ipv6Addr = (*r.start()).into(); 214 let b: Ipv6Addr = (*r.end()).into(); 215 if include_asn { 216 writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?; 217 } else { 218 writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?; 219 } 220 } 221 222 // The documentation says you should always flush a BufWriter. 223 v4.flush()?; 224 v6.flush()?; 225 226 if let Some(output_asn) = args.output_asn { 227 networks.sort(); 228 let mut asn = BufWriter::new(File::create(output_asn)?); 229 for net in networks { 230 writeln!(&mut asn, "{},{}", net.asn, net.name)?; 231 } 232 asn.flush()?; 233 } 234 235 Ok(()) 236 } 237 238 fn main() -> std::io::Result<()> { 239 let args: Args = argh::from_env(); 240 241 convert(args) 242 }