str.rs (4716B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ 4 5 //! String utils for attributes and similar stuff. 6 7 #![deny(missing_docs)] 8 9 use num_traits::ToPrimitive; 10 use std::borrow::Cow; 11 use std::iter::{Filter, Peekable}; 12 use std::str::Split; 13 14 /// A static slice of characters. 15 pub type StaticCharVec = &'static [char]; 16 17 /// A static slice of `str`s. 18 pub type StaticStringVec = &'static [&'static str]; 19 20 /// A "space character" according to: 21 /// 22 /// <https://html.spec.whatwg.org/multipage/#space-character> 23 pub static HTML_SPACE_CHARACTERS: StaticCharVec = 24 &['\u{0020}', '\u{0009}', '\u{000a}', '\u{000c}', '\u{000d}']; 25 26 /// Whether a character is a HTML whitespace character. 27 #[inline] 28 pub fn char_is_whitespace(c: char) -> bool { 29 HTML_SPACE_CHARACTERS.contains(&c) 30 } 31 32 /// Whether all the string is HTML whitespace. 33 #[inline] 34 pub fn is_whitespace(s: &str) -> bool { 35 s.chars().all(char_is_whitespace) 36 } 37 38 #[inline] 39 fn not_empty(&split: &&str) -> bool { 40 !split.is_empty() 41 } 42 43 /// Split a string on HTML whitespace. 44 #[inline] 45 pub fn split_html_space_chars<'a>( 46 s: &'a str, 47 ) -> Filter<Split<'a, StaticCharVec>, fn(&&str) -> bool> { 48 s.split(HTML_SPACE_CHARACTERS) 49 .filter(not_empty as fn(&&str) -> bool) 50 } 51 52 /// Split a string on commas. 53 #[inline] 54 pub fn split_commas<'a>(s: &'a str) -> Filter<Split<'a, char>, fn(&&str) -> bool> { 55 s.split(',').filter(not_empty as fn(&&str) -> bool) 56 } 57 58 /// Character is ascii digit 59 pub fn is_ascii_digit(c: &char) -> bool { 60 match *c { 61 '0'..='9' => true, 62 _ => false, 63 } 64 } 65 66 fn is_decimal_point(c: char) -> bool { 67 c == '.' 68 } 69 70 fn is_exponent_char(c: char) -> bool { 71 match c { 72 'e' | 'E' => true, 73 _ => false, 74 } 75 } 76 77 /// Read a set of ascii digits and read them into a number. 78 pub fn read_numbers<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> (Option<i64>, usize) { 79 match iter.peek() { 80 Some(c) if is_ascii_digit(c) => (), 81 _ => return (None, 0), 82 } 83 84 iter.take_while(is_ascii_digit) 85 .map(|d| d as i64 - '0' as i64) 86 .fold((Some(0i64), 0), |accumulator, d| { 87 let digits = accumulator 88 .0 89 .and_then(|accumulator| accumulator.checked_mul(10)) 90 .and_then(|accumulator| accumulator.checked_add(d)); 91 (digits, accumulator.1 + 1) 92 }) 93 } 94 95 /// Read a decimal fraction. 96 pub fn read_fraction<I: Iterator<Item = char>>( 97 mut iter: Peekable<I>, 98 mut divisor: f64, 99 value: f64, 100 ) -> (f64, usize) { 101 match iter.peek() { 102 Some(c) if is_decimal_point(*c) => (), 103 _ => return (value, 0), 104 } 105 iter.next(); 106 107 iter.take_while(is_ascii_digit) 108 .map(|d| d as i64 - '0' as i64) 109 .fold((value, 1), |accumulator, d| { 110 divisor *= 10f64; 111 (accumulator.0 + d as f64 / divisor, accumulator.1 + 1) 112 }) 113 } 114 115 /// Reads an exponent from an iterator over chars, for example `e100`. 116 pub fn read_exponent<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> Option<i32> { 117 match iter.peek() { 118 Some(c) if is_exponent_char(*c) => (), 119 _ => return None, 120 } 121 iter.next(); 122 123 match iter.peek() { 124 None => None, 125 Some(&'-') => { 126 iter.next(); 127 read_numbers(iter).0.map(|exp| -exp.to_i32().unwrap_or(0)) 128 }, 129 Some(&'+') => { 130 iter.next(); 131 read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0)) 132 }, 133 Some(_) => read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0)), 134 } 135 } 136 137 /// Join a set of strings with a given delimiter `join`. 138 pub fn str_join<I, T>(strs: I, join: &str) -> String 139 where 140 I: IntoIterator<Item = T>, 141 T: AsRef<str>, 142 { 143 strs.into_iter() 144 .enumerate() 145 .fold(String::new(), |mut acc, (i, s)| { 146 if i > 0 { 147 acc.push_str(join); 148 } 149 acc.push_str(s.as_ref()); 150 acc 151 }) 152 } 153 154 /// Returns true if a given string has a given prefix with case-insensitive match. 155 pub fn starts_with_ignore_ascii_case(string: &str, prefix: &str) -> bool { 156 string.len() >= prefix.len() 157 && string.as_bytes()[0..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes()) 158 } 159 160 /// Returns an ascii lowercase version of a string, only allocating if needed. 161 pub fn string_as_ascii_lowercase<'a>(input: &'a str) -> Cow<'a, str> { 162 if input.bytes().any(|c| matches!(c, b'A'..=b'Z')) { 163 input.to_ascii_lowercase().into() 164 } else { 165 // Already ascii lowercase. 166 Cow::Borrowed(input) 167 } 168 }