tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lib.rs (11580B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 use std::{
      6    fs::File,
      7    io::{self, BufRead},
      8    net::Ipv4Addr,
      9    path::Path,
     10 };
     11 
     12 use nserror::*;
     13 use nsstring::{nsACString, nsCString};
     14 use thin_vec::ThinVec;
     15 
     16 #[cfg(windows)]
     17 use {
     18    std::{fs::OpenOptions, os::windows::fs::OpenOptionsExt},
     19    windows_sys::Win32::Storage::FileSystem::{
     20        FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE,
     21    },
     22 };
     23 
     24 /// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
     25 static HTTP_LWS: &[u8] = b" \t";
     26 
     27 /// Trim leading whitespace, trailing whitespace, and quality-value
     28 /// from a token.
     29 fn trim_token(token: &[u8]) -> &[u8] {
     30    // Trim left whitespace
     31    let ltrim = token
     32        .iter()
     33        .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
     34        .count();
     35 
     36    // Trim right whitespace
     37    // remove "; q=..." if present
     38    let rtrim = token[ltrim..]
     39        .iter()
     40        .take_while(|c| **c != b';' && HTTP_LWS.iter().all(|ws| ws != *c))
     41        .count();
     42 
     43    &token[ltrim..ltrim + rtrim]
     44 }
     45 
     46 // Small helper that opens for reading in a fail-fast, cross-platform way.
     47 // This is necessary because on windows it's possible that File::open will
     48 // succeed but reading from the file would hang. See bug 1970349
     49 fn open_read_fast_fail(path: &Path) -> io::Result<File> {
     50    #[cfg(windows)]
     51    {
     52        // If another process opened the file with *no* sharing, this will fail
     53        // immediately with a sharing violation instead of letting a later read hang.
     54        OpenOptions::new()
     55            .read(true)
     56            .share_mode(FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE)
     57            .open(path)
     58    }
     59 
     60    #[cfg(not(windows))]
     61    {
     62        File::open(path)
     63    }
     64 }
     65 
     66 #[no_mangle]
     67 /// Allocates an nsACString that contains a ISO 639 language list
     68 /// notated with HTTP "q" values for output with an HTTP Accept-Language
     69 /// header. Previous q values will be stripped because the order of
     70 /// the langs implies the q value. q-values decrease by 0.1 for each subsequent language,
     71 /// with a minimum value of 0.1.
     72 ///
     73 /// Ex: passing: "en, ja"
     74 ///     returns: "en,ja;q=0.9"
     75 ///
     76 ///     passing: "en, ja, fr_CA"
     77 ///     returns: "en,ja;q=0.9,fr_CA;q=0.8"
     78 pub extern "C" fn rust_prepare_accept_languages(
     79    i_accept_languages: &nsACString,
     80    o_accept_languages: &mut nsACString,
     81 ) -> nsresult {
     82    if i_accept_languages.is_empty() {
     83        return NS_OK;
     84    }
     85 
     86    let make_tokens = || {
     87        i_accept_languages
     88            .split(|c| *c == b',')
     89            .map(trim_token)
     90            .filter(|token| !token.is_empty())
     91    };
     92 
     93    for (count_n, i_token) in make_tokens().enumerate() {
     94        // delimiter if not first item
     95        if count_n != 0 {
     96            o_accept_languages.append(",");
     97        }
     98 
     99        let token_pos = o_accept_languages.len();
    100        o_accept_languages.append(i_token as &[u8]);
    101 
    102        {
    103            let o_token = o_accept_languages.to_mut();
    104            canonicalize_language_tag(&mut o_token[token_pos..]);
    105        }
    106 
    107        //Since we need to emulate chrome behavior i.e languages should get q=1.0,0.9,0.8 and so on...
    108        let q_val_max = 10;
    109        let weight_of_decrement = 1;
    110        let step = std::cmp::min(10, count_n); //if num_language > 10, q_val_max - curr_cnt*weight_of_decrement underflows
    111        let q_val = std::cmp::max(q_val_max - step * weight_of_decrement, 1); //q-weight shouldn't go below 0.1
    112 
    113        if count_n > 0 && q_val < 10 {
    114            o_accept_languages.append(&format!(";q=0.{}", q_val));
    115        }
    116    }
    117 
    118    NS_OK
    119 }
    120 
    121 /// Defines a consistent capitalization for a given language string.
    122 ///
    123 /// # Arguments
    124 /// * `token` - a narrow char slice describing a language.
    125 ///
    126 /// Valid language tags are of the form
    127 /// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
    128 ///
    129 /// Language tags are defined in the
    130 /// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
    131 /// the spec:
    132 ///
    133 /// > At all times, language tags and their subtags, including private
    134 /// > use and extensions, are to be treated as case insensitive: there
    135 /// > exist conventions for the capitalization of some of the subtags,
    136 /// > but these MUST NOT be taken to carry meaning.
    137 ///
    138 /// So why is this code even here? See bug 1108183, I guess.
    139 fn canonicalize_language_tag(token: &mut [u8]) {
    140    for c in token.iter_mut() {
    141        *c = c.to_ascii_lowercase();
    142    }
    143 
    144    let sub_tags = token.split_mut(|c| *c == b'-');
    145    for (i, sub_tag) in sub_tags.enumerate() {
    146        if i == 0 {
    147            // ISO 639-1 language code, like the "en" in "en-US"
    148            continue;
    149        }
    150 
    151        match sub_tag.len() {
    152            // Singleton tag, like "x" or "i". These signify a
    153            // non-standard language, so we stop capitalizing after
    154            // these.
    155            1 => break,
    156            // ISO 3166-1 Country code, like "US"
    157            2 => {
    158                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
    159                sub_tag[1] = sub_tag[1].to_ascii_uppercase();
    160            }
    161            // ISO 15924 script code, like "Nkoo"
    162            4 => {
    163                sub_tag[0] = sub_tag[0].to_ascii_uppercase();
    164            }
    165            _ => {}
    166        };
    167    }
    168 }
    169 
    170 #[no_mangle]
    171 pub extern "C" fn rust_net_is_valid_ipv4_addr(addr: &nsACString) -> bool {
    172    is_valid_ipv4_addr(addr)
    173 }
    174 
    175 #[inline]
    176 fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> {
    177    current_octet.checked_mul(10)?.checked_add(digit_to_apply)
    178 }
    179 
    180 pub fn is_valid_ipv4_addr(addr: &[u8]) -> bool {
    181    let mut current_octet: Option<u8> = None;
    182    let mut dots: u8 = 0;
    183    for c in addr {
    184        let c = *c as char;
    185        match c {
    186            '.' => {
    187                match current_octet {
    188                    None => {
    189                        // starting an octet with a . is not allowed
    190                        return false;
    191                    }
    192                    Some(_) => {
    193                        dots += 1;
    194                        current_octet = None;
    195                    }
    196                }
    197            }
    198            // The character is not a digit
    199            no_digit if no_digit.to_digit(10).is_none() => {
    200                return false;
    201            }
    202            digit => {
    203                match current_octet {
    204                    None => {
    205                        // Unwrap is sound because it has been checked in the previous arm
    206                        current_octet = Some(digit.to_digit(10).unwrap() as u8);
    207                    }
    208                    Some(octet) => {
    209                        if let Some(0) = current_octet {
    210                            // Leading 0 is not allowed
    211                            return false;
    212                        }
    213                        if let Some(applied) =
    214                            try_apply_digit(octet, digit.to_digit(10).unwrap() as u8)
    215                        {
    216                            current_octet = Some(applied);
    217                        } else {
    218                            // Multiplication or Addition overflowed
    219                            return false;
    220                        }
    221                    }
    222                }
    223            }
    224        }
    225    }
    226    dots == 3 && current_octet.is_some()
    227 }
    228 
    229 #[no_mangle]
    230 pub extern "C" fn rust_net_is_valid_ipv6_addr(addr: &nsACString) -> bool {
    231    is_valid_ipv6_addr(addr)
    232 }
    233 
    234 pub fn is_valid_ipv6_addr(addr: &[u8]) -> bool {
    235    let mut double_colon = false;
    236    let mut colon_before = false;
    237    let mut digits: u8 = 0;
    238    let mut blocks: u8 = 0;
    239 
    240    // The smallest ipv6 is unspecified (::)
    241    // The IP starts with a single colon
    242    if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' {
    243        return false;
    244    }
    245    //Enumerate with an u8 for cache locality
    246    for (i, c) in (0u8..).zip(addr) {
    247        match c {
    248            maybe_digit if maybe_digit.is_ascii_hexdigit() => {
    249                // Too many digits in the block
    250                if digits == 4 {
    251                    return false;
    252                }
    253                colon_before = false;
    254                digits += 1;
    255            }
    256            b':' => {
    257                // Too many columns
    258                if double_colon && colon_before || blocks == 8 {
    259                    return false;
    260                }
    261                if !colon_before {
    262                    if digits != 0 {
    263                        blocks += 1;
    264                    }
    265                    digits = 0;
    266                    colon_before = true;
    267                } else if !double_colon {
    268                    double_colon = true;
    269                }
    270            }
    271            b'.' => {
    272                // IPv4 from the last block
    273                if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) {
    274                    return double_colon && blocks < 6 || !double_colon && blocks == 6;
    275                }
    276                return false;
    277            }
    278            _ => {
    279                // Invalid character
    280                return false;
    281            }
    282        }
    283    }
    284    if colon_before && !double_colon {
    285        // The IP ends with a single colon
    286        return false;
    287    }
    288    if digits != 0 {
    289        blocks += 1;
    290    }
    291 
    292    double_colon && blocks < 8 || !double_colon && blocks == 8
    293 }
    294 
    295 #[no_mangle]
    296 pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool {
    297    is_valid_scheme_char(a_char)
    298 }
    299 
    300 #[no_mangle]
    301 pub extern "C" fn rust_net_is_valid_scheme(scheme: &nsACString) -> bool {
    302    if scheme.is_empty() {
    303        return false;
    304    }
    305 
    306    // first char must be alpha
    307    if !scheme[0].is_ascii_alphabetic() {
    308        return false;
    309    }
    310 
    311    scheme[1..]
    312        .iter()
    313        .all(|a_char| is_valid_scheme_char(*a_char))
    314 }
    315 
    316 fn is_valid_scheme_char(a_char: u8) -> bool {
    317    a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-'
    318 }
    319 
    320 pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool;
    321 
    322 #[no_mangle]
    323 pub extern "C" fn rust_parse_etc_hosts(path: &nsACString, callback: ParsingCallback) {
    324    let path_str = path.to_utf8();
    325    let path = Path::new(&*path_str);
    326 
    327    // Try to open in a way that fails immediately if locked (on Windows).
    328    let file = match open_read_fast_fail(path) {
    329        Ok(f) => io::BufReader::new(f),
    330        Err(..) => return, // Not readable right now; bail out quietly like before.
    331    };
    332 
    333    let mut array = ThinVec::new();
    334    for line in file.lines() {
    335        let line = match line {
    336            Ok(l) => l,
    337            Err(..) => continue,
    338        };
    339 
    340        let mut iter = line.split('#').next().unwrap().split_whitespace();
    341        iter.next(); // skip the IP
    342 
    343        array.extend(
    344            iter.filter(|host| {
    345                // Make sure it's a valid domain
    346                let invalid = [
    347                    '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']',
    348                ];
    349                host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..])
    350            })
    351            .map(nsCString::from),
    352        );
    353 
    354        // /etc/hosts files can be huge. To make sure we don't block shutdown
    355        // for every 100 domains that we parse we call the callback passing the
    356        // domains and see if we should keep parsing.
    357        if array.len() > 100 {
    358            let keep_going = callback(&array);
    359            array.clear();
    360            if !keep_going {
    361                break;
    362            }
    363        }
    364    }
    365 
    366    if !array.is_empty() {
    367        callback(&array);
    368    }
    369 }