tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

str.rs (4716B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 //! String utils for attributes and similar stuff.
      6 
      7 #![deny(missing_docs)]
      8 
      9 use num_traits::ToPrimitive;
     10 use std::borrow::Cow;
     11 use std::iter::{Filter, Peekable};
     12 use std::str::Split;
     13 
     14 /// A static slice of characters.
     15 pub type StaticCharVec = &'static [char];
     16 
     17 /// A static slice of `str`s.
     18 pub type StaticStringVec = &'static [&'static str];
     19 
     20 /// A "space character" according to:
     21 ///
     22 /// <https://html.spec.whatwg.org/multipage/#space-character>
     23 pub static HTML_SPACE_CHARACTERS: StaticCharVec =
     24    &['\u{0020}', '\u{0009}', '\u{000a}', '\u{000c}', '\u{000d}'];
     25 
     26 /// Whether a character is a HTML whitespace character.
     27 #[inline]
     28 pub fn char_is_whitespace(c: char) -> bool {
     29    HTML_SPACE_CHARACTERS.contains(&c)
     30 }
     31 
     32 /// Whether all the string is HTML whitespace.
     33 #[inline]
     34 pub fn is_whitespace(s: &str) -> bool {
     35    s.chars().all(char_is_whitespace)
     36 }
     37 
     38 #[inline]
     39 fn not_empty(&split: &&str) -> bool {
     40    !split.is_empty()
     41 }
     42 
     43 /// Split a string on HTML whitespace.
     44 #[inline]
     45 pub fn split_html_space_chars<'a>(
     46    s: &'a str,
     47 ) -> Filter<Split<'a, StaticCharVec>, fn(&&str) -> bool> {
     48    s.split(HTML_SPACE_CHARACTERS)
     49        .filter(not_empty as fn(&&str) -> bool)
     50 }
     51 
     52 /// Split a string on commas.
     53 #[inline]
     54 pub fn split_commas<'a>(s: &'a str) -> Filter<Split<'a, char>, fn(&&str) -> bool> {
     55    s.split(',').filter(not_empty as fn(&&str) -> bool)
     56 }
     57 
     58 /// Character is ascii digit
     59 pub fn is_ascii_digit(c: &char) -> bool {
     60    match *c {
     61        '0'..='9' => true,
     62        _ => false,
     63    }
     64 }
     65 
     66 fn is_decimal_point(c: char) -> bool {
     67    c == '.'
     68 }
     69 
     70 fn is_exponent_char(c: char) -> bool {
     71    match c {
     72        'e' | 'E' => true,
     73        _ => false,
     74    }
     75 }
     76 
     77 /// Read a set of ascii digits and read them into a number.
     78 pub fn read_numbers<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> (Option<i64>, usize) {
     79    match iter.peek() {
     80        Some(c) if is_ascii_digit(c) => (),
     81        _ => return (None, 0),
     82    }
     83 
     84    iter.take_while(is_ascii_digit)
     85        .map(|d| d as i64 - '0' as i64)
     86        .fold((Some(0i64), 0), |accumulator, d| {
     87            let digits = accumulator
     88                .0
     89                .and_then(|accumulator| accumulator.checked_mul(10))
     90                .and_then(|accumulator| accumulator.checked_add(d));
     91            (digits, accumulator.1 + 1)
     92        })
     93 }
     94 
     95 /// Read a decimal fraction.
     96 pub fn read_fraction<I: Iterator<Item = char>>(
     97    mut iter: Peekable<I>,
     98    mut divisor: f64,
     99    value: f64,
    100 ) -> (f64, usize) {
    101    match iter.peek() {
    102        Some(c) if is_decimal_point(*c) => (),
    103        _ => return (value, 0),
    104    }
    105    iter.next();
    106 
    107    iter.take_while(is_ascii_digit)
    108        .map(|d| d as i64 - '0' as i64)
    109        .fold((value, 1), |accumulator, d| {
    110            divisor *= 10f64;
    111            (accumulator.0 + d as f64 / divisor, accumulator.1 + 1)
    112        })
    113 }
    114 
    115 /// Reads an exponent from an iterator over chars, for example `e100`.
    116 pub fn read_exponent<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> Option<i32> {
    117    match iter.peek() {
    118        Some(c) if is_exponent_char(*c) => (),
    119        _ => return None,
    120    }
    121    iter.next();
    122 
    123    match iter.peek() {
    124        None => None,
    125        Some(&'-') => {
    126            iter.next();
    127            read_numbers(iter).0.map(|exp| -exp.to_i32().unwrap_or(0))
    128        },
    129        Some(&'+') => {
    130            iter.next();
    131            read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0))
    132        },
    133        Some(_) => read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0)),
    134    }
    135 }
    136 
    137 /// Join a set of strings with a given delimiter `join`.
    138 pub fn str_join<I, T>(strs: I, join: &str) -> String
    139 where
    140    I: IntoIterator<Item = T>,
    141    T: AsRef<str>,
    142 {
    143    strs.into_iter()
    144        .enumerate()
    145        .fold(String::new(), |mut acc, (i, s)| {
    146            if i > 0 {
    147                acc.push_str(join);
    148            }
    149            acc.push_str(s.as_ref());
    150            acc
    151        })
    152 }
    153 
    154 /// Returns true if a given string has a given prefix with case-insensitive match.
    155 pub fn starts_with_ignore_ascii_case(string: &str, prefix: &str) -> bool {
    156    string.len() >= prefix.len()
    157        && string.as_bytes()[0..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
    158 }
    159 
    160 /// Returns an ascii lowercase version of a string, only allocating if needed.
    161 pub fn string_as_ascii_lowercase<'a>(input: &'a str) -> Cow<'a, str> {
    162    if input.bytes().any(|c| matches!(c, b'A'..=b'Z')) {
    163        input.to_ascii_lowercase().into()
    164    } else {
    165        // Already ascii lowercase.
    166        Cow::Borrowed(input)
    167    }
    168 }