lib.rs (11580B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 use std::{ 6 fs::File, 7 io::{self, BufRead}, 8 net::Ipv4Addr, 9 path::Path, 10 }; 11 12 use nserror::*; 13 use nsstring::{nsACString, nsCString}; 14 use thin_vec::ThinVec; 15 16 #[cfg(windows)] 17 use { 18 std::{fs::OpenOptions, os::windows::fs::OpenOptionsExt}, 19 windows_sys::Win32::Storage::FileSystem::{ 20 FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, 21 }, 22 }; 23 24 /// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h 25 static HTTP_LWS: &[u8] = b" \t"; 26 27 /// Trim leading whitespace, trailing whitespace, and quality-value 28 /// from a token. 29 fn trim_token(token: &[u8]) -> &[u8] { 30 // Trim left whitespace 31 let ltrim = token 32 .iter() 33 .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c)) 34 .count(); 35 36 // Trim right whitespace 37 // remove "; q=..." if present 38 let rtrim = token[ltrim..] 39 .iter() 40 .take_while(|c| **c != b';' && HTTP_LWS.iter().all(|ws| ws != *c)) 41 .count(); 42 43 &token[ltrim..ltrim + rtrim] 44 } 45 46 // Small helper that opens for reading in a fail-fast, cross-platform way. 47 // This is necessary because on windows it's possible that File::open will 48 // succeed but reading from the file would hang. See bug 1970349 49 fn open_read_fast_fail(path: &Path) -> io::Result<File> { 50 #[cfg(windows)] 51 { 52 // If another process opened the file with *no* sharing, this will fail 53 // immediately with a sharing violation instead of letting a later read hang. 54 OpenOptions::new() 55 .read(true) 56 .share_mode(FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) 57 .open(path) 58 } 59 60 #[cfg(not(windows))] 61 { 62 File::open(path) 63 } 64 } 65 66 #[no_mangle] 67 /// Allocates an nsACString that contains a ISO 639 language list 68 /// notated with HTTP "q" values for output with an HTTP Accept-Language 69 /// header. Previous q values will be stripped because the order of 70 /// the langs implies the q value. q-values decrease by 0.1 for each subsequent language, 71 /// with a minimum value of 0.1. 72 /// 73 /// Ex: passing: "en, ja" 74 /// returns: "en,ja;q=0.9" 75 /// 76 /// passing: "en, ja, fr_CA" 77 /// returns: "en,ja;q=0.9,fr_CA;q=0.8" 78 pub extern "C" fn rust_prepare_accept_languages( 79 i_accept_languages: &nsACString, 80 o_accept_languages: &mut nsACString, 81 ) -> nsresult { 82 if i_accept_languages.is_empty() { 83 return NS_OK; 84 } 85 86 let make_tokens = || { 87 i_accept_languages 88 .split(|c| *c == b',') 89 .map(trim_token) 90 .filter(|token| !token.is_empty()) 91 }; 92 93 for (count_n, i_token) in make_tokens().enumerate() { 94 // delimiter if not first item 95 if count_n != 0 { 96 o_accept_languages.append(","); 97 } 98 99 let token_pos = o_accept_languages.len(); 100 o_accept_languages.append(i_token as &[u8]); 101 102 { 103 let o_token = o_accept_languages.to_mut(); 104 canonicalize_language_tag(&mut o_token[token_pos..]); 105 } 106 107 //Since we need to emulate chrome behavior i.e languages should get q=1.0,0.9,0.8 and so on... 108 let q_val_max = 10; 109 let weight_of_decrement = 1; 110 let step = std::cmp::min(10, count_n); //if num_language > 10, q_val_max - curr_cnt*weight_of_decrement underflows 111 let q_val = std::cmp::max(q_val_max - step * weight_of_decrement, 1); //q-weight shouldn't go below 0.1 112 113 if count_n > 0 && q_val < 10 { 114 o_accept_languages.append(&format!(";q=0.{}", q_val)); 115 } 116 } 117 118 NS_OK 119 } 120 121 /// Defines a consistent capitalization for a given language string. 122 /// 123 /// # Arguments 124 /// * `token` - a narrow char slice describing a language. 125 /// 126 /// Valid language tags are of the form 127 /// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN" 128 /// 129 /// Language tags are defined in the 130 /// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to 131 /// the spec: 132 /// 133 /// > At all times, language tags and their subtags, including private 134 /// > use and extensions, are to be treated as case insensitive: there 135 /// > exist conventions for the capitalization of some of the subtags, 136 /// > but these MUST NOT be taken to carry meaning. 137 /// 138 /// So why is this code even here? See bug 1108183, I guess. 139 fn canonicalize_language_tag(token: &mut [u8]) { 140 for c in token.iter_mut() { 141 *c = c.to_ascii_lowercase(); 142 } 143 144 let sub_tags = token.split_mut(|c| *c == b'-'); 145 for (i, sub_tag) in sub_tags.enumerate() { 146 if i == 0 { 147 // ISO 639-1 language code, like the "en" in "en-US" 148 continue; 149 } 150 151 match sub_tag.len() { 152 // Singleton tag, like "x" or "i". These signify a 153 // non-standard language, so we stop capitalizing after 154 // these. 155 1 => break, 156 // ISO 3166-1 Country code, like "US" 157 2 => { 158 sub_tag[0] = sub_tag[0].to_ascii_uppercase(); 159 sub_tag[1] = sub_tag[1].to_ascii_uppercase(); 160 } 161 // ISO 15924 script code, like "Nkoo" 162 4 => { 163 sub_tag[0] = sub_tag[0].to_ascii_uppercase(); 164 } 165 _ => {} 166 }; 167 } 168 } 169 170 #[no_mangle] 171 pub extern "C" fn rust_net_is_valid_ipv4_addr(addr: &nsACString) -> bool { 172 is_valid_ipv4_addr(addr) 173 } 174 175 #[inline] 176 fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> { 177 current_octet.checked_mul(10)?.checked_add(digit_to_apply) 178 } 179 180 pub fn is_valid_ipv4_addr(addr: &[u8]) -> bool { 181 let mut current_octet: Option<u8> = None; 182 let mut dots: u8 = 0; 183 for c in addr { 184 let c = *c as char; 185 match c { 186 '.' => { 187 match current_octet { 188 None => { 189 // starting an octet with a . is not allowed 190 return false; 191 } 192 Some(_) => { 193 dots += 1; 194 current_octet = None; 195 } 196 } 197 } 198 // The character is not a digit 199 no_digit if no_digit.to_digit(10).is_none() => { 200 return false; 201 } 202 digit => { 203 match current_octet { 204 None => { 205 // Unwrap is sound because it has been checked in the previous arm 206 current_octet = Some(digit.to_digit(10).unwrap() as u8); 207 } 208 Some(octet) => { 209 if let Some(0) = current_octet { 210 // Leading 0 is not allowed 211 return false; 212 } 213 if let Some(applied) = 214 try_apply_digit(octet, digit.to_digit(10).unwrap() as u8) 215 { 216 current_octet = Some(applied); 217 } else { 218 // Multiplication or Addition overflowed 219 return false; 220 } 221 } 222 } 223 } 224 } 225 } 226 dots == 3 && current_octet.is_some() 227 } 228 229 #[no_mangle] 230 pub extern "C" fn rust_net_is_valid_ipv6_addr(addr: &nsACString) -> bool { 231 is_valid_ipv6_addr(addr) 232 } 233 234 pub fn is_valid_ipv6_addr(addr: &[u8]) -> bool { 235 let mut double_colon = false; 236 let mut colon_before = false; 237 let mut digits: u8 = 0; 238 let mut blocks: u8 = 0; 239 240 // The smallest ipv6 is unspecified (::) 241 // The IP starts with a single colon 242 if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' { 243 return false; 244 } 245 //Enumerate with an u8 for cache locality 246 for (i, c) in (0u8..).zip(addr) { 247 match c { 248 maybe_digit if maybe_digit.is_ascii_hexdigit() => { 249 // Too many digits in the block 250 if digits == 4 { 251 return false; 252 } 253 colon_before = false; 254 digits += 1; 255 } 256 b':' => { 257 // Too many columns 258 if double_colon && colon_before || blocks == 8 { 259 return false; 260 } 261 if !colon_before { 262 if digits != 0 { 263 blocks += 1; 264 } 265 digits = 0; 266 colon_before = true; 267 } else if !double_colon { 268 double_colon = true; 269 } 270 } 271 b'.' => { 272 // IPv4 from the last block 273 if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) { 274 return double_colon && blocks < 6 || !double_colon && blocks == 6; 275 } 276 return false; 277 } 278 _ => { 279 // Invalid character 280 return false; 281 } 282 } 283 } 284 if colon_before && !double_colon { 285 // The IP ends with a single colon 286 return false; 287 } 288 if digits != 0 { 289 blocks += 1; 290 } 291 292 double_colon && blocks < 8 || !double_colon && blocks == 8 293 } 294 295 #[no_mangle] 296 pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool { 297 is_valid_scheme_char(a_char) 298 } 299 300 #[no_mangle] 301 pub extern "C" fn rust_net_is_valid_scheme(scheme: &nsACString) -> bool { 302 if scheme.is_empty() { 303 return false; 304 } 305 306 // first char must be alpha 307 if !scheme[0].is_ascii_alphabetic() { 308 return false; 309 } 310 311 scheme[1..] 312 .iter() 313 .all(|a_char| is_valid_scheme_char(*a_char)) 314 } 315 316 fn is_valid_scheme_char(a_char: u8) -> bool { 317 a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-' 318 } 319 320 pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool; 321 322 #[no_mangle] 323 pub extern "C" fn rust_parse_etc_hosts(path: &nsACString, callback: ParsingCallback) { 324 let path_str = path.to_utf8(); 325 let path = Path::new(&*path_str); 326 327 // Try to open in a way that fails immediately if locked (on Windows). 328 let file = match open_read_fast_fail(path) { 329 Ok(f) => io::BufReader::new(f), 330 Err(..) => return, // Not readable right now; bail out quietly like before. 331 }; 332 333 let mut array = ThinVec::new(); 334 for line in file.lines() { 335 let line = match line { 336 Ok(l) => l, 337 Err(..) => continue, 338 }; 339 340 let mut iter = line.split('#').next().unwrap().split_whitespace(); 341 iter.next(); // skip the IP 342 343 array.extend( 344 iter.filter(|host| { 345 // Make sure it's a valid domain 346 let invalid = [ 347 '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']', 348 ]; 349 host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..]) 350 }) 351 .map(nsCString::from), 352 ); 353 354 // /etc/hosts files can be huge. To make sure we don't block shutdown 355 // for every 100 domains that we parse we call the callback passing the 356 // domains and see if we should keep parsing. 357 if array.len() > 100 { 358 let keep_going = callback(&array); 359 array.clear(); 360 if !keep_going { 361 break; 362 } 363 } 364 } 365 366 if !array.is_empty() { 367 callback(&array); 368 } 369 }