tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fragment_directive_impl.rs (35812B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 use percent_encoding::{percent_decode, percent_encode, NON_ALPHANUMERIC};
      5 use std::str;
      6 
      7 /// The `FragmentDirectiveParameter` represents one of
      8 /// `[prefix-,]start[,end][,-suffix]` without any surrounding `-` or `,`.
      9 ///
     10 /// The token is stored as percent-decoded string.
     11 /// Therefore, interfaces exist to
     12 ///   - create a `FragmentDirectiveParameter` from a percent-encoded string.
     13 ///     This function will determine from occurrence and position of a dash
     14 ///     if the token represents a `prefix`, `suffix` or either `start` or `end`.
     15 ///   - create a percent-encoded string from the value the token holds.
     16 pub enum TextDirectiveParameter {
     17    Prefix(String),
     18    StartOrEnd(String),
     19    Suffix(String),
     20 }
     21 
     22 impl TextDirectiveParameter {
     23    /// Creates a token from a percent-encoded string.
     24    /// Based on position of a dash the correct token type is determined.
     25    /// Returns `None` in case of an ill-formed token:
     26    ///   - starts and ends with a dash (i.e. `-token-`)
     27    ///   - only consists of a dash (i.e. `-`) or is empty
     28    ///   - conversion from percent-encoded string to utf8 fails.
     29    pub fn from_percent_encoded(token: &[u8]) -> Option<Self> {
     30        if token.is_empty() {
     31            return None;
     32        }
     33        let starts_with_dash = *token.first().unwrap() == b'-';
     34        let ends_with_dash = *token.last().unwrap() == b'-';
     35        if starts_with_dash && ends_with_dash {
     36            // `-token-` is not valid.
     37            return None;
     38        }
     39        if token.len() == 1 && starts_with_dash {
     40            // `-` is not valid.
     41            return None;
     42        }
     43        // Note: Trimming of the raw strings is currently not mentioned in the spec.
     44        // However, it looks as it is implicitly expected.
     45        if starts_with_dash {
     46            if let Ok(decoded_suffix) = percent_decode(&token[1..]).decode_utf8() {
     47                return Some(TextDirectiveParameter::Suffix(String::from(
     48                    decoded_suffix.trim(),
     49                )));
     50            }
     51            return None;
     52        }
     53        if ends_with_dash {
     54            if let Ok(decoded_prefix) = percent_decode(&token[..token.len() - 1]).decode_utf8() {
     55                return Some(TextDirectiveParameter::Prefix(String::from(
     56                    decoded_prefix.trim(),
     57                )));
     58            }
     59            return None;
     60        }
     61        if let Ok(decoded_text) = percent_decode(&token).decode_utf8() {
     62            return Some(TextDirectiveParameter::StartOrEnd(String::from(
     63                decoded_text.trim(),
     64            )));
     65        }
     66        None
     67    }
     68 
     69    /// Returns the value of the token as percent-decoded `String`.
     70    pub fn value(&self) -> &String {
     71        match self {
     72            TextDirectiveParameter::Prefix(value) => &value,
     73            TextDirectiveParameter::StartOrEnd(value) => &value,
     74            TextDirectiveParameter::Suffix(value) => &value,
     75        }
     76    }
     77 
     78    /// Creates a percent-encoded string of the token's value.
     79    /// This includes placing a dash appropriately
     80    /// to indicate whether this token is prefix, suffix or start/end.
     81    ///
     82    /// This method always returns a new object.
     83    pub fn to_percent_encoded_string(&self) -> String {
     84        let encode = |text: &String| percent_encode(text.as_bytes(), NON_ALPHANUMERIC).to_string();
     85        match self {
     86            Self::Prefix(text) => encode(text) + "-",
     87            Self::StartOrEnd(text) => encode(text),
     88            Self::Suffix(text) => {
     89                let encoded = encode(text);
     90                let mut result = String::with_capacity(encoded.len() + 1);
     91                result.push_str("-");
     92                result.push_str(&encoded);
     93                result
     94            }
     95        }
     96    }
     97 }
     98 
     99 /// This struct represents one parsed text directive using Rust types.
    100 ///
    101 /// A text fragment is encoded into a URL fragment like this:
    102 /// `text=[prefix-,]start[,end][,-suffix]`
    103 ///
    104 /// The text directive is considered valid if at least `start` is not None.
    105 /// (see `Self::is_valid()`).
    106 #[derive(Default)]
    107 pub struct TextDirective {
    108    prefix: Option<TextDirectiveParameter>,
    109    start: Option<TextDirectiveParameter>,
    110    end: Option<TextDirectiveParameter>,
    111    suffix: Option<TextDirectiveParameter>,
    112 }
    113 impl TextDirective {
    114    /// Creates an instance from string parts.
    115    /// This function is intended to be used when a fragment directive string should be created.
    116    /// Returns `None` if `start` is empty.
    117    pub fn from_parts(prefix: String, start: String, end: String, suffix: String) -> Option<Self> {
    118        if !start.is_empty() {
    119            Some(Self {
    120                prefix: if !prefix.is_empty() {
    121                    Some(TextDirectiveParameter::Prefix(prefix.trim().into()))
    122                } else {
    123                    None
    124                },
    125                start: Some(TextDirectiveParameter::StartOrEnd(start.trim().into())),
    126                end: if !end.is_empty() {
    127                    Some(TextDirectiveParameter::StartOrEnd(end.trim().into()))
    128                } else {
    129                    None
    130                },
    131                suffix: if !suffix.is_empty() {
    132                    Some(TextDirectiveParameter::Suffix(suffix.trim().into()))
    133                } else {
    134                    None
    135                },
    136            })
    137        } else {
    138            None
    139        }
    140    }
    141 
    142    /// Creates an instance from a percent-encoded string
    143    /// that originates from a fragment directive.
    144    ///
    145    /// `text_fragment` is supposed to have this format:
    146    /// ```ignore
    147    /// text=[prefix-,]start[,end][,-suffix]
    148    /// ```
    149    /// This function returns `None` if `text_fragment`
    150    /// does not start with `text=`, it contains 0 or more
    151    /// than 4 elements or prefix/suffix/start or end
    152    /// occur too many times.
    153    /// It also returns `None` if any of the tokens parses to fail.
    154    pub fn from_percent_encoded_string(text_directive: &str) -> Option<Self> {
    155        // first check if the string starts with `text=`
    156        if text_directive.len() < 6 {
    157            return None;
    158        }
    159        if !text_directive.starts_with("text=") {
    160            return None;
    161        }
    162 
    163        let mut parsed_text_directive = Self::default();
    164        let valid = text_directive[5..]
    165            .split(",")
    166            // Parse the substrings into `TextDirectiveParameter`s. This will determine
    167            // for each substring if it is a Prefix, Suffix or Start/End,
    168            // or if it is invalid.
    169            .map(|token| TextDirectiveParameter::from_percent_encoded(token.as_bytes()))
    170            // populate `parsed_text_directive` and check its validity by inserting the parameters
    171            // one by one. Given that the parameters are sorted by their position in the source,
    172            // the validity of the text directive can be determined while adding the parameters.
    173            .map(|token| match token {
    174                Some(TextDirectiveParameter::Prefix(..)) => {
    175                    if !parsed_text_directive.is_empty() {
    176                        // `prefix-` must be the first result.
    177                        return false;
    178                    }
    179                    parsed_text_directive.prefix = token;
    180                    return true;
    181                }
    182                Some(TextDirectiveParameter::StartOrEnd(..)) => {
    183                    if parsed_text_directive.suffix.is_some() {
    184                        // start or end must come before `-suffix`.
    185                        return false;
    186                    }
    187                    if parsed_text_directive.start.is_none() {
    188                        parsed_text_directive.start = token;
    189                        return true;
    190                    }
    191                    if parsed_text_directive.end.is_none() {
    192                        parsed_text_directive.end = token;
    193                        return true;
    194                    }
    195                    // if `start` and `end` is already filled,
    196                    // this is invalid as well.
    197                    return false;
    198                }
    199                Some(TextDirectiveParameter::Suffix(..)) => {
    200                    if parsed_text_directive.start.is_some()
    201                        && parsed_text_directive.suffix.is_none()
    202                    {
    203                        // `start` must be present and `-suffix` must not be present.
    204                        // `end` may be present.
    205                        parsed_text_directive.suffix = token;
    206                        return true;
    207                    }
    208                    return false;
    209                }
    210                // empty or invalid token renders the whole text directive invalid.
    211                None => false,
    212            })
    213            .all(|valid| valid);
    214        if valid {
    215            return Some(parsed_text_directive);
    216        }
    217        None
    218    }
    219 
    220    /// Creates a percent-encoded string for the current `TextDirective`.
    221    /// In the unlikely case that the `TextDirective` is invalid (i.e. `start` is None),
    222    /// which should have been caught earlier,this method returns an empty string.
    223    pub fn to_percent_encoded_string(&self) -> String {
    224        if !self.is_valid() {
    225            return String::default();
    226        }
    227        String::from("text=")
    228            + &[&self.prefix, &self.start, &self.end, &self.suffix]
    229                .iter()
    230                .filter_map(|&token| token.as_ref())
    231                .map(|token| token.to_percent_encoded_string())
    232                .collect::<Vec<_>>()
    233                .join(",")
    234    }
    235 
    236    pub fn start(&self) -> &Option<TextDirectiveParameter> {
    237        &self.start
    238    }
    239 
    240    pub fn end(&self) -> &Option<TextDirectiveParameter> {
    241        &self.end
    242    }
    243 
    244    pub fn prefix(&self) -> &Option<TextDirectiveParameter> {
    245        &self.prefix
    246    }
    247 
    248    pub fn suffix(&self) -> &Option<TextDirectiveParameter> {
    249        &self.suffix
    250    }
    251 
    252    fn is_empty(&self) -> bool {
    253        self.prefix.is_none() && self.start.is_none() && self.end.is_none() && self.suffix.is_none()
    254    }
    255 
    256    /// A `TextDirective` object is valid if it contains the `start` token.
    257    /// All other tokens are optional.
    258    fn is_valid(&self) -> bool {
    259        self.start.is_some()
    260    }
    261 }
    262 /// Parses a fragment directive into a list of `TextDirective` objects and removes
    263 /// the fragment directive from the input url.
    264 ///
    265 /// If the hash does not contain a fragment directive, `hash` is not modified
    266 /// and this function returns `None`.
    267 /// Otherwise, the fragment directive is removed from `hash` and parsed.
    268 /// The function returns a tuple of three elements:
    269 ///   - The input url hash without the fragment directive. Trailing `#`s are removed as well.
    270 ///   - The unparsed fragment directive.
    271 ///   - All parsed valid text directives. Invalid text directives are silently ignored.
    272 pub fn parse_fragment_directive_and_remove_it_from_hash(
    273    hash: &str,
    274 ) -> Option<(&str, &str, Vec<TextDirective>)> {
    275    // The Fragment Directive is preceded by a `:~:`,
    276    // which is only allowed to appear in the hash once.
    277    let mut fragment_directive_iter = hash.split(":~:");
    278    let hash_without_fragment_directive =
    279        &hash[..fragment_directive_iter.next().unwrap_or_default().len()];
    280 
    281    if let Some(fragment_directive) = fragment_directive_iter.next() {
    282        if fragment_directive_iter.next().is_some() {
    283            // There are multiple occurrences of `:~:`, which is not allowed.
    284            return Some((hash_without_fragment_directive, fragment_directive, vec![]));
    285        }
    286        // - fragments are separated by `&`.
    287        // - if a fragment does not start with `text=`, it is not a text directive and will be ignored.
    288        // - if parsing of the text fragment fails (for whatever reason), it will be ignored.
    289        let text_directives: Vec<_> = fragment_directive
    290            .split("&")
    291            .map(|maybe_text_fragment| {
    292                TextDirective::from_percent_encoded_string(&maybe_text_fragment)
    293            })
    294            .filter_map(|maybe_text_directive| maybe_text_directive)
    295            .collect();
    296 
    297        return Some((
    298            hash_without_fragment_directive,
    299            fragment_directive,
    300            text_directives,
    301        ));
    302    }
    303    None
    304 }
    305 
    306 /// Creates a percent-encoded text fragment string.
    307 ///
    308 /// The returned string starts with `:~:`, so that it can be appended
    309 /// to a normal fragment.
    310 /// Text directives which are not valid (ie., they are missing the `start` parameter),
    311 /// are skipped.
    312 ///
    313 /// Returns `None` if `fragment_directives` is empty.
    314 pub fn create_fragment_directive_string(text_directives: &Vec<TextDirective>) -> Option<String> {
    315    if text_directives.is_empty() {
    316        return None;
    317    }
    318    let encoded_fragment_directives: Vec<_> = text_directives
    319        .iter()
    320        .filter(|&fragment_directive| fragment_directive.is_valid())
    321        .map(|fragment_directive| fragment_directive.to_percent_encoded_string())
    322        .filter(|text_directive| !text_directive.is_empty())
    323        .collect();
    324    if encoded_fragment_directives.is_empty() {
    325        return None;
    326    }
    327    Some(String::from(":~:") + &encoded_fragment_directives.join("&"))
    328 }
    329 
    330 /// Creates the percent-encoded text directive string for a single text directive.
    331 pub fn create_text_directive_string(text_directive: &TextDirective) -> Option<String> {
    332    if text_directive.is_valid() {
    333        Some(text_directive.to_percent_encoded_string())
    334    } else {
    335        None
    336    }
    337 }
    338 
    339 #[cfg(test)]
    340 mod tests {
    341    use super::{
    342        create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,
    343        TextDirective,
    344    };
    345 
    346    /// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.
    347    #[test]
    348    fn test_parse_fragment_directive_with_one_text_directive() {
    349        // U+2705 WHITE HEAVY CHECK MARK - UTF-8 percent encoding: %E2%9C%85
    350        let checkmark = String::from_utf8(vec![0xE2, 0x9C, 0x85]).unwrap();
    351        let test_cases = vec![
    352            (":~:text=start", (None, Some("start"), None, None)),
    353            (
    354                ":~:text=start,end",
    355                (None, Some("start"), Some("end"), None),
    356            ),
    357            (
    358                ":~:text=prefix-,start",
    359                (Some("prefix"), Some("start"), None, None),
    360            ),
    361            (
    362                ":~:text=prefix-,start,end",
    363                (Some("prefix"), Some("start"), Some("end"), None),
    364            ),
    365            (
    366                ":~:text=prefix-,start,end,-suffix",
    367                (Some("prefix"), Some("start"), Some("end"), Some("suffix")),
    368            ),
    369            (
    370                ":~:text=start,-suffix",
    371                (None, Some("start"), None, Some("suffix")),
    372            ),
    373            (
    374                ":~:text=start,end,-suffix",
    375                (None, Some("start"), Some("end"), Some("suffix")),
    376            ),
    377            (":~:text=text=", (None, Some("text="), None, None)),
    378            (":~:text=%25", (None, Some("%"), None, None)),
    379            (":~:text=%", (None, Some("%"), None, None)),
    380            (":~:text=%%", (None, Some("%%"), None, None)),
    381            (":~:text=%25%25F", (None, Some("%%F"), None, None)),
    382            (
    383                ":~:text=%E2%9C%85",
    384                (None, Some(checkmark.as_str()), None, None),
    385            ),
    386            (":~:text=#", (None, Some("#"), None, None)),
    387            (":~:text=:", (None, Some(":"), None, None)),
    388            (
    389                ":~:text=prefix--,start",
    390                (Some("prefix-"), Some("start"), None, None),
    391            ),
    392            (
    393                ":~:text=p-refix-,start",
    394                (Some("p-refix"), Some("start"), None, None),
    395            ),
    396        ];
    397        for (url, (prefix, start, end, suffix)) in test_cases {
    398            let (stripped_url, fragment_directive, result) =
    399                parse_fragment_directive_and_remove_it_from_hash(&url)
    400                    .expect("The parser must find a result.");
    401            assert_eq!(
    402                fragment_directive,
    403                &url[3..],
    404                "The extracted fragment directive string
    405                should be unsanitized and therefore match the input string."
    406            );
    407            assert_eq!(result.len(), 1, "There must be one parsed text fragment.");
    408            assert_eq!(
    409                stripped_url, "",
    410                "The fragment directive must be removed from the url hash."
    411            );
    412            let text_directive = result.first().unwrap();
    413            if prefix.is_none() {
    414                assert!(
    415                    text_directive.prefix().is_none(),
    416                    "There must be no `prefix` token (test case `{}`).",
    417                    url
    418                );
    419            } else {
    420                assert!(
    421                    text_directive
    422                        .prefix()
    423                        .as_ref()
    424                        .expect("There must be a `prefix` token.")
    425                        .value()
    426                        == prefix.unwrap(),
    427                    "Wrong value for `prefix` (test case `{}`).",
    428                    url
    429                );
    430            }
    431            if start.is_none() {
    432                assert!(
    433                    text_directive.start().is_none(),
    434                    "There must be no `start` token (test case `{}`).",
    435                    url
    436                );
    437            } else {
    438                assert!(
    439                    text_directive
    440                        .start()
    441                        .as_ref()
    442                        .expect("There must be a `start` token.")
    443                        .value()
    444                        == start.unwrap(),
    445                    "Wrong value for `start` (test case `{}`).",
    446                    url
    447                );
    448            }
    449            if end.is_none() {
    450                assert!(
    451                    text_directive.end().is_none(),
    452                    "There must be no `end` token (test case `{}`).",
    453                    url
    454                );
    455            } else {
    456                assert!(
    457                    text_directive
    458                        .end()
    459                        .as_ref()
    460                        .expect("There must be a `end` token.")
    461                        .value()
    462                        == end.unwrap(),
    463                    "Wrong value for `end` (test case `{}`).",
    464                    url
    465                );
    466            }
    467            if suffix.is_none() {
    468                assert!(
    469                    text_directive.suffix().is_none(),
    470                    "There must be no `suffix` token (test case `{}`).",
    471                    url
    472                );
    473            } else {
    474                assert!(
    475                    text_directive
    476                        .suffix()
    477                        .as_ref()
    478                        .expect("There must be a `suffix` token.")
    479                        .value()
    480                        == suffix.unwrap(),
    481                    "Wrong value for `suffix` (test case `{}`).",
    482                    url
    483                );
    484            }
    485        }
    486    }
    487 
    488    /// This test verifies that a text fragment is parsed correctly if it is preceded
    489    /// or followed by a fragment (i.e. `#foo:~:text=bar`).
    490    #[test]
    491    fn test_parse_text_fragment_after_fragments() {
    492        let url = "foo:~:text=start";
    493        let (stripped_url, fragment_directive, result) =
    494            parse_fragment_directive_and_remove_it_from_hash(&url)
    495                .expect("The parser must find a result.");
    496        assert_eq!(
    497            result.len(),
    498            1,
    499            "There must be exactly one parsed text fragment."
    500        );
    501        assert_eq!(
    502            stripped_url, "foo",
    503            "The fragment directive was not removed correctly."
    504        );
    505        assert_eq!(
    506            fragment_directive, "text=start",
    507            "The fragment directive was not extracted correctly."
    508        );
    509        let fragment = result.first().unwrap();
    510        assert!(fragment.prefix().is_none(), "There is no `prefix` token.");
    511        assert_eq!(
    512            fragment
    513                .start()
    514                .as_ref()
    515                .expect("There must be a `start` token.")
    516                .value(),
    517            "start"
    518        );
    519        assert!(fragment.end().is_none(), "There is no `end` token.");
    520        assert!(fragment.suffix().is_none(), "There is no `suffix` token.");
    521    }
    522 
    523    /// Ensure that multiple text fragments are parsed correctly.
    524    #[test]
    525    fn test_parse_multiple_text_fragments() {
    526        let url = ":~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";
    527        let (_, _, text_directives) = parse_fragment_directive_and_remove_it_from_hash(&url)
    528            .expect("The parser must find a result.");
    529        assert_eq!(
    530            text_directives.len(),
    531            3,
    532            "There must be exactly two parsed text fragments."
    533        );
    534        let first_text_directive = &text_directives[0];
    535        assert_eq!(
    536            first_text_directive
    537                .prefix()
    538                .as_ref()
    539                .expect("There must be a `prefix` token.")
    540                .value(),
    541            "prefix"
    542        );
    543        assert_eq!(
    544            first_text_directive
    545                .start()
    546                .as_ref()
    547                .expect("There must be a `start` token.")
    548                .value(),
    549            "start"
    550        );
    551        assert!(
    552            first_text_directive.end().is_none(),
    553            "There is no `end` token."
    554        );
    555        assert_eq!(
    556            first_text_directive
    557                .suffix()
    558                .as_ref()
    559                .expect("There must be a `suffix` token.")
    560                .value(),
    561            "suffix"
    562        );
    563 
    564        let second_text_directive = &text_directives[1];
    565        assert!(
    566            second_text_directive.prefix().is_none(),
    567            "There is no `prefix` token."
    568        );
    569        assert_eq!(
    570            second_text_directive
    571                .start()
    572                .as_ref()
    573                .expect("There must be a `start` token.")
    574                .value(),
    575            "foo"
    576        );
    577        assert!(
    578            second_text_directive.end().is_none(),
    579            "There is no `end` token."
    580        );
    581        assert!(
    582            second_text_directive.suffix().is_none(),
    583            "There is no `suffix` token."
    584        );
    585        let third_text_directive = &text_directives[2];
    586        assert!(
    587            third_text_directive.prefix().is_none(),
    588            "There is no `prefix` token."
    589        );
    590        assert_eq!(
    591            third_text_directive
    592                .start()
    593                .as_ref()
    594                .expect("There must be a `start` token.")
    595                .value(),
    596            "bar"
    597        );
    598        assert!(
    599            third_text_directive.end().is_none(),
    600            "There is no `end` token."
    601        );
    602        assert_eq!(
    603            third_text_directive
    604                .suffix()
    605                .as_ref()
    606                .expect("There must be a `suffix` token.")
    607                .value(),
    608            "suffix"
    609        );
    610    }
    611 
    612    /// Multiple text directives should be parsed correctly
    613    /// if they are surrounded or separated by unknown directives.
    614    #[test]
    615    fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {
    616        for url in [
    617            ":~:foo&text=start1&text=start2",
    618            ":~:text=start1&foo&text=start2",
    619            ":~:text=start1&text=start2&foo",
    620        ] {
    621            let (_, fragment_directive, text_directives) =
    622                parse_fragment_directive_and_remove_it_from_hash(&url)
    623                    .expect("The parser must find a result.");
    624            assert_eq!(
    625                fragment_directive,
    626                &url[3..],
    627                "The extracted fragment directive string is unsanitized
    628                and should contain the unknown directive."
    629            );
    630            assert_eq!(
    631                text_directives.len(),
    632                2,
    633                "There must be exactly two parsed text fragments."
    634            );
    635            let first_text_directive = &text_directives[0];
    636            assert_eq!(
    637                first_text_directive
    638                    .start()
    639                    .as_ref()
    640                    .expect("There must be a `start` token.")
    641                    .value(),
    642                "start1"
    643            );
    644            let second_text_directive = &text_directives[1];
    645            assert_eq!(
    646                second_text_directive
    647                    .start()
    648                    .as_ref()
    649                    .expect("There must be a `start` token.")
    650                    .value(),
    651                "start2"
    652            );
    653        }
    654    }
    655 
    656    /// Ensures that input that doesn't contain a text fragment does not produce a result.
    657    /// This includes the use of partial identifying tokens necessary for a text fragment
    658    /// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)
    659    /// In these cases, the parser must return `None` to indicate that there are no valid text fragments.
    660    #[test]
    661    fn test_parse_invalid_or_unknown_fragment_directive() {
    662        // there is no fragment directive here, hence the original url should not be updated.
    663        for url in ["foo", "foo:", "text=prefix-,start"] {
    664            let text_directives = parse_fragment_directive_and_remove_it_from_hash(&url);
    665            assert!(
    666                text_directives.is_none(),
    667                "The fragment `{}` does not contain a valid or known fragment directive.",
    668                url
    669            );
    670        }
    671        // there is an (invalid) fragment directive present. It needs to be removed from the url.
    672        for (url, url_without_fragment_directive_ref) in [
    673            ("foo:~:", "foo"),
    674            ("foo:~:bar", "foo"),
    675            (":~:text=foo-,bar,-baz:~:text=foo", ""),
    676        ] {
    677            let (url_without_fragment_directive, _, _) =
    678                parse_fragment_directive_and_remove_it_from_hash(&url)
    679                    .expect("There is a fragment directive which should have been removed.");
    680            assert_eq!(
    681                url_without_fragment_directive, url_without_fragment_directive_ref,
    682                "The fragment directive has not been removed correctly from  fragment `{}`.",
    683                url
    684            );
    685        }
    686    }
    687 
    688    /// Ensures that ill-formed text directives (but valid fragment directives)
    689    /// (starting correctly with `:~:text=`) are not parsed.
    690    /// Instead `None` must be returned.
    691    /// Test cases include invalid combinations of `prefix`/`suffix`es,
    692    /// additional `,`s, too many `start`/`end` tokens, or empty text fragments.
    693    #[test]
    694    fn test_parse_invalid_text_fragments() {
    695        for url in [
    696            ":~:text=start,start,start",
    697            ":~:text=prefix-,prefix-",
    698            ":~:text=prefix-,-suffix",
    699            ":~:text=prefix-,start,start,start",
    700            ":~:text=prefix-,start,start,start,-suffix",
    701            ":~:text=start,start,start,-suffix",
    702            ":~:text=prefix-,start,end,-suffix,foo",
    703            ":~:text=foo,prefix-,start",
    704            ":~:text=prefix-,,start,",
    705            ":~:text=,prefix,start",
    706            ":~:text=",
    707            ":~:text=&",
    708            ":~:text=,",
    709        ] {
    710            let (url_without_fragment_directive, _, _) =
    711                parse_fragment_directive_and_remove_it_from_hash(&url).expect("");
    712            assert!(
    713                url_without_fragment_directive.is_empty(),
    714                "The fragment directive `{}` does not contain a valid fragment directive. \
    715                 It must be removed from the original url anyway.",
    716                url
    717            );
    718        }
    719    }
    720 
    721    /// Ensure that out of multiple text fragments only the invalid ones are ignored
    722    /// while valid text fragments are still returned.
    723    /// Since correct parsing of multiple text fragments as well as
    724    /// several forms of invalid text fragments are already tested in
    725    /// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,
    726    /// it should be enough to test this with only one fragment directive
    727    /// that contains two text fragments, one of them being invalid.
    728    #[test]
    729    fn test_valid_and_invalid_text_directives() {
    730        for url in [":~:text=start&text=,foo,", ":~:text=foo,foo,foo&text=start"] {
    731            let (_, fragment_directive, text_directives) =
    732                parse_fragment_directive_and_remove_it_from_hash(&url)
    733                    .expect("The parser must find a result.");
    734            assert_eq!(
    735                fragment_directive,
    736                &url[3..],
    737                "The extracted fragment directive string is unsanitized
    738                and should contain invalid text directives."
    739            );
    740            assert_eq!(
    741                text_directives.len(),
    742                1,
    743                "There must be exactly one parsed text fragment."
    744            );
    745            let text_directive = text_directives.first().unwrap();
    746            assert_eq!(
    747                text_directive
    748                    .start()
    749                    .as_ref()
    750                    .expect("There must be a `start` value.")
    751                    .value(),
    752                "start",
    753                "The `start` value of the text directive has the wrong value."
    754            );
    755        }
    756    }
    757 
    758    /// Ensures that a fragment directive that contains percent-encoded characters
    759    /// is decoded correctly. This explicitly includes characters which are used
    760    /// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.
    761    #[test]
    762    fn test_parse_percent_encoding_tokens() {
    763        let url = ":~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";
    764        let (_, fragment_directive, text_directives) =
    765            parse_fragment_directive_and_remove_it_from_hash(&url)
    766                .expect("The parser must find a result.");
    767        assert_eq!(
    768            fragment_directive,
    769            &url[3..],
    770            "The extracted fragment directive string is unsanitized
    771                and should contain the original and percent-decoded string."
    772        );
    773        let text_directive = text_directives.first().unwrap();
    774        assert_eq!(
    775            text_directive
    776                .prefix()
    777                .as_ref()
    778                .expect("There must be a prefix.")
    779                .value(),
    780            "prefix&",
    781            ""
    782        );
    783        assert_eq!(
    784            text_directive
    785                .start()
    786                .as_ref()
    787                .expect("There must be a prefix.")
    788                .value(),
    789            "start and,",
    790            ""
    791        );
    792        assert_eq!(
    793            text_directive
    794                .end()
    795                .as_ref()
    796                .expect("There must be a prefix.")
    797                .value(),
    798            "end#",
    799            ""
    800        );
    801        assert_eq!(
    802            text_directive
    803                .suffix()
    804                .as_ref()
    805                .expect("There must be a prefix.")
    806                .value(),
    807            "&suffix-",
    808            ""
    809        );
    810    }
    811 
    812    /// Ensures that a text fragment is created correctly,
    813    /// based on a given combination of tokens.
    814    /// This includes all sorts of combinations of
    815    /// `prefix`, `suffix`, `start` and `end`,
    816    /// als well as values for these tokens which contain
    817    /// characters that need to be encoded because they are
    818    /// identifiers for text fragments
    819    /// (#`, `, `, `&`, `:`, `~` and `-`).
    820    #[test]
    821    fn test_create_fragment_directive() {
    822        for (text_directive, expected_fragment_directive) in [
    823            (
    824                TextDirective::from_parts(
    825                    String::new(),
    826                    String::from("start"),
    827                    String::new(),
    828                    String::new(),
    829                )
    830                .unwrap(),
    831                ":~:text=start",
    832            ),
    833            (
    834                TextDirective::from_parts(
    835                    String::new(),
    836                    String::from("start"),
    837                    String::from("end"),
    838                    String::new(),
    839                )
    840                .unwrap(),
    841                ":~:text=start,end",
    842            ),
    843            (
    844                TextDirective::from_parts(
    845                    String::from("prefix"),
    846                    String::from("start"),
    847                    String::from("end"),
    848                    String::new(),
    849                )
    850                .unwrap(),
    851                ":~:text=prefix-,start,end",
    852            ),
    853            (
    854                TextDirective::from_parts(
    855                    String::from("prefix"),
    856                    String::from("start"),
    857                    String::from("end"),
    858                    String::from("suffix"),
    859                )
    860                .unwrap(),
    861                ":~:text=prefix-,start,end,-suffix",
    862            ),
    863            (
    864                TextDirective::from_parts(
    865                    String::new(),
    866                    String::from("start"),
    867                    String::from("end"),
    868                    String::from("suffix"),
    869                )
    870                .unwrap(),
    871                ":~:text=start,end,-suffix",
    872            ),
    873            (
    874                TextDirective::from_parts(
    875                    String::from("prefix"),
    876                    String::from("start"),
    877                    String::new(),
    878                    String::from("suffix"),
    879                )
    880                .unwrap(),
    881                ":~:text=prefix-,start,-suffix",
    882            ),
    883            (
    884                TextDirective::from_parts(
    885                    String::from("prefix-"),
    886                    String::from("start and,"),
    887                    String::from("&end"),
    888                    String::from("#:~:suffix"),
    889                )
    890                .unwrap(),
    891                ":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",
    892            ),
    893        ] {
    894            let fragment_directive = create_fragment_directive_string(&vec![text_directive])
    895                .expect("The given input must produce a valid fragment directive.");
    896            assert_eq!(fragment_directive, expected_fragment_directive);
    897        }
    898    }
    899 
    900    /// Ensures that a fragment directive is created correctly if multiple text fragments are given.
    901    /// The resulting fragment must start with `:~:`
    902    /// and each text fragment must be separated using `&text=`.
    903    #[test]
    904    fn test_create_fragment_directive_from_multiple_text_directives() {
    905        let text_directives = vec![
    906            TextDirective::from_parts(
    907                String::new(),
    908                String::from("start1"),
    909                String::new(),
    910                String::new(),
    911            )
    912            .unwrap(),
    913            TextDirective::from_parts(
    914                String::new(),
    915                String::from("start2"),
    916                String::new(),
    917                String::new(),
    918            )
    919            .unwrap(),
    920            TextDirective::from_parts(
    921                String::new(),
    922                String::from("start3"),
    923                String::new(),
    924                String::new(),
    925            )
    926            .unwrap(),
    927        ];
    928        let fragment_directive = create_fragment_directive_string(&text_directives)
    929            .expect("The given input must produce a valid fragment directive.");
    930        assert_eq!(
    931            fragment_directive, ":~:text=start1&text=start2&text=start3",
    932            "The created fragment directive is wrong for multiple fragments."
    933        );
    934    }
    935 }