tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

helpers.rs (14404B)


      1 /* -*- Mode: rust; rust-indent-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 extern crate nsstring;
      7 use nsstring::nsACString;
      8 use nsstring::nsCString;
      9 use thin_vec::ThinVec;
     10 
     11 use url::Url;
     12 use urlpattern::quirks as Uq;
     13 use urlpattern::{UrlPatternInit, UrlPatternOptions};
     14 
     15 use crate::base::*;
     16 
     17 pub fn init_from_string_and_base_url(
     18    input: *const nsACString,
     19    base_url: *const nsACString,
     20 ) -> Option<UrlPatternInit> {
     21    if input.is_null() {
     22        return None;
     23    }
     24    if let Some(tmp) = unsafe { input.as_ref().map(|x| x.to_utf8().into_owned()) } {
     25        let maybe_base = if !base_url.is_null() {
     26            let tmp = unsafe { base_url.as_ref() }
     27                .map(|x| x.to_utf8().into_owned())
     28                .as_deref()
     29                .map(Url::parse);
     30            match tmp {
     31                Some(Ok(t)) => Some(t),
     32                _ => None,
     33            }
     34        } else {
     35            None
     36        };
     37 
     38        if let Ok(init) =
     39            urlpattern::UrlPatternInit::parse_constructor_string::<regex::Regex>(&tmp, maybe_base)
     40        {
     41            return Some(init.clone());
     42        }
     43    }
     44    None
     45 }
     46 
     47 pub fn maybe_to_option_string(m_str: &MaybeString) -> Option<String> {
     48    if !m_str.valid {
     49        return None;
     50    }
     51    Some(m_str.string.to_string().to_owned())
     52 }
     53 
     54 pub fn option_to_maybe_string(os: Option<String>) -> MaybeString {
     55    let s = match os {
     56        Some(s) => s,
     57        _ => {
     58            return MaybeString {
     59                string: nsCString::from(""),
     60                valid: false,
     61            }
     62        }
     63    };
     64    let s = nsCString::from(s.as_str());
     65    MaybeString {
     66        string: s,
     67        valid: true,
     68    }
     69 }
     70 
     71 // creates the regex object with the desired flags
     72 // this function was adapted from
     73 // https://github.com/denoland/rust-urlpattern/blob/main/src/regexp.rs
     74 pub fn parse_regex(pattern: &str, flags: &str) -> Result<regex::Regex, ()> {
     75    regex::Regex::new(&format!("(?{flags}){pattern}")).map_err(|_| ())
     76 }
     77 
     78 // returns the list of regex capture groups contained in the text input
     79 // this function was adapted from
     80 // https://github.com/denoland/rust-urlpattern/blob/main/src/regexp.rs
     81 pub fn regex_matches<'a>(regexp: regex::Regex, text: &'a str) -> Option<Vec<Option<String>>> {
     82    let captures = regexp.captures(text)?;
     83    let captures = captures
     84        .iter()
     85        .skip(1)
     86        .map(|c| c.map(|m| m.as_str().to_string()))
     87        .collect();
     88    Some(captures)
     89 }
     90 
     91 // creates a regex object and returns the list of capture groups from a given input string
     92 // this function was adapted from
     93 // https://github.com/denoland/rust-urlpattern/blob/main/src/quirks.rs
     94 pub fn regexp_parse_and_matches<'a>(
     95    regexp: &'a str,
     96    input: &'a str,
     97    ignore_case: bool,
     98 ) -> Option<Vec<Option<String>>> {
     99    let flags = if ignore_case { "ui" } else { "u" };
    100    let regexp = parse_regex(regexp, flags).ok()?;
    101    regex_matches(regexp, input)
    102 }
    103 
    104 // performs the match on the inner matcher of a component with a given input string
    105 // this function was adapted from
    106 // https://github.com/denoland/rust-urlpattern/blob/main/src/matcher.rs
    107 pub fn matcher_matches<'a>(
    108    matcher: &Uq::Matcher,
    109    mut input: &'a str,
    110    ignore_case: bool,
    111 ) -> Option<Vec<Option<String>>> {
    112    let prefix_len = matcher.prefix.len();
    113    let suffix_len = matcher.suffix.len();
    114    let input_len = input.len();
    115    if prefix_len + suffix_len > 0 {
    116        // The input must be at least as long as the prefix and suffix combined,
    117        // because these must both be present, and not overlap.
    118        if input_len < prefix_len + suffix_len {
    119            return None;
    120        }
    121        if !input.starts_with(&matcher.prefix) {
    122            return None;
    123        }
    124        if !input.ends_with(&matcher.suffix) {
    125            return None;
    126        }
    127 
    128        input = &input[prefix_len..input_len - suffix_len];
    129    }
    130 
    131    match &matcher.inner {
    132        Uq::InnerMatcher::Literal { literal } => {
    133            if ignore_case {
    134                (input.to_lowercase() == literal.to_lowercase()).then(Vec::new)
    135            } else {
    136                (input == literal).then(Vec::new)
    137            }
    138        }
    139 
    140        Uq::InnerMatcher::SingleCapture {
    141            filter,
    142            allow_empty,
    143        } => {
    144            if input.is_empty() && !allow_empty {
    145                return None;
    146            }
    147            if let Some(filter) = filter {
    148                if ignore_case {
    149                    if input
    150                        .to_lowercase()
    151                        .contains(filter.to_lowercase().collect::<Vec<_>>().as_slice())
    152                    {
    153                        return None;
    154                    }
    155                } else if input.contains(*filter) {
    156                    return None;
    157                }
    158            }
    159            Some(vec![Some(input.to_string())])
    160        }
    161        Uq::InnerMatcher::RegExp { regexp, .. } => {
    162            regexp_parse_and_matches(regexp.as_str(), input, ignore_case)
    163        }
    164    }
    165 }
    166 
    167 impl From<Uq::MatchInput> for UrlpMatchInput {
    168    fn from(match_input: Uq::MatchInput) -> UrlpMatchInput {
    169        UrlpMatchInput {
    170            protocol: nsCString::from(match_input.protocol),
    171            username: nsCString::from(match_input.username),
    172            password: nsCString::from(match_input.password),
    173            hostname: nsCString::from(match_input.hostname),
    174            port: nsCString::from(match_input.port),
    175            pathname: nsCString::from(match_input.pathname),
    176            search: nsCString::from(match_input.search),
    177            hash: nsCString::from(match_input.hash),
    178        }
    179    }
    180 }
    181 
    182 // convert from UrlpInit to lib::UrlPatternInit, used by:
    183 // * parse_pattern_from_string
    184 // * parse_pattern_from_init
    185 impl From<UrlpInit> for UrlPatternInit {
    186    fn from(wrapper: UrlpInit) -> UrlPatternInit {
    187        let maybe_base = if wrapper.base_url.valid {
    188            let s = wrapper.base_url.string.to_string().to_owned();
    189            if s.is_empty() {
    190                None
    191            } else {
    192                Url::parse(s.as_str()).ok()
    193            }
    194        } else {
    195            None
    196        };
    197        UrlPatternInit {
    198            protocol: maybe_to_option_string(&wrapper.protocol),
    199            username: maybe_to_option_string(&wrapper.username),
    200            password: maybe_to_option_string(&wrapper.password),
    201            hostname: maybe_to_option_string(&wrapper.hostname),
    202            port: maybe_to_option_string(&wrapper.port),
    203            pathname: maybe_to_option_string(&wrapper.pathname),
    204            search: maybe_to_option_string(&wrapper.search),
    205            hash: maybe_to_option_string(&wrapper.hash),
    206            base_url: maybe_base,
    207        }
    208    }
    209 }
    210 
    211 impl From<&UrlpInit> for UrlPatternInit {
    212    fn from(wrapper: &UrlpInit) -> UrlPatternInit {
    213        let maybe_base = if wrapper.base_url.valid {
    214            let s = wrapper.base_url.string.to_string().to_owned();
    215            if s.is_empty() {
    216                None
    217            } else {
    218                Url::parse(s.as_str()).ok()
    219            }
    220        } else {
    221            None
    222        };
    223        UrlPatternInit {
    224            protocol: maybe_to_option_string(&wrapper.protocol),
    225            username: maybe_to_option_string(&wrapper.username),
    226            password: maybe_to_option_string(&wrapper.password),
    227            hostname: maybe_to_option_string(&wrapper.hostname),
    228            port: maybe_to_option_string(&wrapper.port),
    229            pathname: maybe_to_option_string(&wrapper.pathname),
    230            search: maybe_to_option_string(&wrapper.search),
    231            hash: maybe_to_option_string(&wrapper.hash),
    232            base_url: maybe_base,
    233        }
    234    }
    235 }
    236 
    237 // convert from UrlpInit to quirks::UrlPatternInit
    238 // used by parse_pattern into the internal function
    239 // MatchInput `From` conversion also uses
    240 impl From<UrlpInit> for Uq::UrlPatternInit {
    241    fn from(wrapper: UrlpInit) -> Uq::UrlPatternInit {
    242        let maybe_base = if wrapper.base_url.valid {
    243            Some(wrapper.base_url.string.to_string())
    244        } else {
    245            None
    246        };
    247 
    248        Uq::UrlPatternInit {
    249            protocol: maybe_to_option_string(&wrapper.protocol),
    250            username: maybe_to_option_string(&wrapper.username),
    251            password: maybe_to_option_string(&wrapper.password),
    252            hostname: maybe_to_option_string(&wrapper.hostname),
    253            port: maybe_to_option_string(&wrapper.port),
    254            pathname: maybe_to_option_string(&wrapper.pathname),
    255            search: maybe_to_option_string(&wrapper.search),
    256            hash: maybe_to_option_string(&wrapper.hash),
    257            base_url: maybe_base,
    258        }
    259    }
    260 }
    261 
    262 // needed for process_match_input_from_init
    263 impl From<&UrlpInit> for Uq::UrlPatternInit {
    264    fn from(wrapper: &UrlpInit) -> Self {
    265        let maybe_base = if wrapper.base_url.valid {
    266            Some(wrapper.base_url.string.to_string())
    267        } else {
    268            None
    269        };
    270        Uq::UrlPatternInit {
    271            protocol: maybe_to_option_string(&wrapper.protocol),
    272            username: maybe_to_option_string(&wrapper.username),
    273            password: maybe_to_option_string(&wrapper.password),
    274            hostname: maybe_to_option_string(&wrapper.hostname),
    275            port: maybe_to_option_string(&wrapper.port),
    276            pathname: maybe_to_option_string(&wrapper.pathname),
    277            search: maybe_to_option_string(&wrapper.search),
    278            hash: maybe_to_option_string(&wrapper.hash),
    279            base_url: maybe_base,
    280        }
    281    }
    282 }
    283 
    284 impl From<Uq::UrlPatternInit> for UrlpInit {
    285    fn from(init: Uq::UrlPatternInit) -> UrlpInit {
    286        let base = match init.base_url.as_ref() {
    287            Some(s) => MaybeString {
    288                valid: true,
    289                string: nsCString::from(s),
    290            },
    291            _ => MaybeString {
    292                valid: false,
    293                string: nsCString::from(""),
    294            },
    295        };
    296 
    297        UrlpInit {
    298            protocol: option_to_maybe_string(init.protocol),
    299            username: option_to_maybe_string(init.username),
    300            password: option_to_maybe_string(init.password),
    301            hostname: option_to_maybe_string(init.hostname),
    302            port: option_to_maybe_string(init.port),
    303            pathname: option_to_maybe_string(init.pathname),
    304            search: option_to_maybe_string(init.search),
    305            hash: option_to_maybe_string(init.hash),
    306            base_url: base,
    307        }
    308    }
    309 }
    310 
    311 impl From<UrlpInnerMatcher> for Uq::InnerMatcher {
    312    fn from(wrapper: UrlpInnerMatcher) -> Uq::InnerMatcher {
    313        match wrapper.inner_type {
    314            UrlpInnerMatcherType::Literal => Uq::InnerMatcher::Literal {
    315                literal: wrapper.literal.to_string().to_owned(),
    316            },
    317            UrlpInnerMatcherType::SingleCapture => {
    318                let maybe_filter = if wrapper.filter_exists {
    319                    Some(wrapper.filter)
    320                } else {
    321                    None
    322                };
    323                Uq::InnerMatcher::SingleCapture {
    324                    allow_empty: wrapper.allow_empty,
    325                    filter: maybe_filter,
    326                }
    327            }
    328            UrlpInnerMatcherType::RegExp => Uq::InnerMatcher::RegExp {
    329                regexp: wrapper.regexp.to_string().to_owned(),
    330            },
    331        }
    332    }
    333 }
    334 
    335 impl From<Uq::InnerMatcher> for UrlpInnerMatcher {
    336    fn from(inner: Uq::InnerMatcher) -> UrlpInnerMatcher {
    337        match inner {
    338            Uq::InnerMatcher::Literal { literal } => {
    339                UrlpInnerMatcher {
    340                    inner_type: UrlpInnerMatcherType::Literal,
    341                    literal: nsCString::from(literal).to_owned(),
    342                    allow_empty: false, // maybe should be an optional
    343                    filter_exists: false,
    344                    filter: 'x'.to_owned(),
    345                    regexp: nsCString::from("").to_owned(),
    346                }
    347            }
    348            Uq::InnerMatcher::SingleCapture {
    349                filter,
    350                allow_empty,
    351            } => {
    352                UrlpInnerMatcher {
    353                    inner_type: UrlpInnerMatcherType::SingleCapture,
    354                    literal: nsCString::from("").to_owned(),
    355                    allow_empty, // maybe should be an optional
    356                    filter_exists: filter.is_some(),
    357                    filter: filter.unwrap_or('\0'),
    358                    regexp: nsCString::from("").to_owned(),
    359                }
    360            }
    361            Uq::InnerMatcher::RegExp { regexp } => UrlpInnerMatcher {
    362                inner_type: UrlpInnerMatcherType::RegExp,
    363                literal: nsCString::from("").to_owned(),
    364                allow_empty: false,
    365                filter_exists: false,
    366                filter: 'x'.to_owned(),
    367                regexp: nsCString::from(regexp).to_owned(),
    368            },
    369        }
    370    }
    371 }
    372 
    373 impl From<UrlpMatcher> for Uq::Matcher {
    374    fn from(wrapper: UrlpMatcher) -> Uq::Matcher {
    375        Uq::Matcher {
    376            prefix: wrapper.prefix.to_string().to_owned(),
    377            suffix: wrapper.suffix.to_string().to_owned(),
    378            inner: wrapper.inner.into(),
    379        }
    380    }
    381 }
    382 
    383 impl From<Uq::Matcher> for UrlpMatcher {
    384    fn from(matcher: Uq::Matcher) -> UrlpMatcher {
    385        UrlpMatcher {
    386            prefix: nsCString::from(matcher.prefix).to_owned(),
    387            suffix: nsCString::from(matcher.suffix).to_owned(),
    388            inner: matcher.inner.into(),
    389        }
    390    }
    391 }
    392 
    393 impl From<Uq::UrlPatternComponent> for UrlpComponent {
    394    fn from(comp: Uq::UrlPatternComponent) -> UrlpComponent {
    395        UrlpComponent {
    396            pattern_string: nsCString::from(comp.pattern_string).to_owned(),
    397            regexp_string: nsCString::from(comp.regexp_string).to_owned(),
    398            matcher: comp.matcher.into(),
    399            group_name_list: comp
    400                .group_name_list
    401                .into_iter()
    402                .map(nsCString::from)
    403                .collect::<ThinVec<_>>(),
    404        }
    405    }
    406 }
    407 
    408 // easily convert from OptionsWrapper to internal type
    409 // used by parse_pattern
    410 impl Into<UrlPatternOptions> for UrlpOptions {
    411    fn into(self) -> UrlPatternOptions {
    412        UrlPatternOptions {
    413            ignore_case: self.ignore_case,
    414        }
    415    }
    416 }