tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

segmenter_grapheme.rs (7236B)


      1 // This file is part of ICU4X. For terms of use, please see the file
      2 // called LICENSE at the top level of the ICU4X source tree
      3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
      4 
      5 #[diplomat::bridge]
      6 #[diplomat::abi_rename = "icu4x_{0}_mv1"]
      7 #[diplomat::attr(auto, namespace = "icu4x")]
      8 pub mod ffi {
      9    use alloc::boxed::Box;
     10    use icu_segmenter::scaffold::{Latin1, PotentiallyIllFormedUtf8, Utf16};
     11 
     12    #[cfg(feature = "buffer_provider")]
     13    use crate::unstable::{errors::ffi::DataError, provider::ffi::DataProvider};
     14 
     15    #[diplomat::opaque]
     16    /// An ICU4X grapheme-cluster-break segmenter, capable of finding grapheme cluster breakpoints
     17    /// in strings.
     18    #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter, Struct)]
     19    #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenterBorrowed, Struct, hidden)]
     20    pub struct GraphemeClusterSegmenter(icu_segmenter::GraphemeClusterSegmenter);
     21 
     22    #[diplomat::opaque]
     23    #[diplomat::rust_link(icu::segmenter::iterators::GraphemeClusterBreakIterator, Struct)]
     24    pub struct GraphemeClusterBreakIteratorUtf8<'a>(
     25        icu_segmenter::iterators::GraphemeClusterBreakIterator<'a, 'a, PotentiallyIllFormedUtf8>,
     26    );
     27 
     28    #[diplomat::opaque]
     29    #[diplomat::rust_link(icu::segmenter::iterators::GraphemeClusterBreakIterator, Struct)]
     30    pub struct GraphemeClusterBreakIteratorUtf16<'a>(
     31        icu_segmenter::iterators::GraphemeClusterBreakIterator<'a, 'a, Utf16>,
     32    );
     33 
     34    #[diplomat::opaque]
     35    #[diplomat::rust_link(icu::segmenter::iterators::GraphemeClusterBreakIterator, Struct)]
     36    pub struct GraphemeClusterBreakIteratorLatin1<'a>(
     37        icu_segmenter::iterators::GraphemeClusterBreakIterator<'a, 'a, Latin1>,
     38    );
     39 
     40    impl GraphemeClusterSegmenter {
     41        /// Construct an [`GraphemeClusterSegmenter`] using compiled data.
     42        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)]
     43        #[diplomat::attr(auto, constructor)]
     44        #[cfg(feature = "compiled_data")]
     45        pub fn create() -> Box<GraphemeClusterSegmenter> {
     46            Box::new(GraphemeClusterSegmenter(
     47                icu_segmenter::GraphemeClusterSegmenter::new().static_to_owned(),
     48            ))
     49        }
     50        /// Construct an [`GraphemeClusterSegmenter`].
     51        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)]
     52        #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "with_provider")]
     53        #[cfg(feature = "buffer_provider")]
     54        pub fn create_with_provider(
     55            provider: &DataProvider,
     56        ) -> Result<Box<GraphemeClusterSegmenter>, DataError> {
     57            Ok(Box::new(GraphemeClusterSegmenter(
     58                icu_segmenter::GraphemeClusterSegmenter::try_new_with_buffer_provider(
     59                    provider.get()?,
     60                )?,
     61            )))
     62        }
     63        /// Segments a string.
     64        ///
     65        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
     66        /// to the WHATWG Encoding Standard.
     67        #[diplomat::rust_link(
     68            icu::segmenter::GraphemeClusterSegmenterBorrowed::segment_str,
     69            FnInStruct,
     70            hidden
     71        )]
     72        #[diplomat::rust_link(
     73            icu::segmenter::GraphemeClusterSegmenterBorrowed::segment_utf8,
     74            FnInStruct
     75        )]
     76        #[diplomat::attr(not(supports = utf8_strings), disable)]
     77        #[diplomat::attr(*, rename = "segment")]
     78        pub fn segment_utf8<'a>(
     79            &'a self,
     80            input: &'a DiplomatStr,
     81        ) -> Box<GraphemeClusterBreakIteratorUtf8<'a>> {
     82            Box::new(GraphemeClusterBreakIteratorUtf8(
     83                self.0.as_borrowed().segment_utf8(input),
     84            ))
     85        }
     86 
     87        /// Segments a string.
     88        ///
     89        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
     90        /// to the WHATWG Encoding Standard.
     91        #[diplomat::rust_link(
     92            icu::segmenter::GraphemeClusterSegmenterBorrowed::segment_utf16,
     93            FnInStruct
     94        )]
     95        #[diplomat::attr(not(supports = utf8_strings), rename = "segment")]
     96        #[diplomat::attr(supports = utf8_strings, rename = "segment16")]
     97        pub fn segment_utf16<'a>(
     98            &'a self,
     99            input: &'a DiplomatStr16,
    100        ) -> Box<GraphemeClusterBreakIteratorUtf16<'a>> {
    101            Box::new(GraphemeClusterBreakIteratorUtf16(
    102                self.0.as_borrowed().segment_utf16(input),
    103            ))
    104        }
    105 
    106        /// Segments a Latin-1 string.
    107        #[diplomat::rust_link(
    108            icu::segmenter::GraphemeClusterSegmenterBorrowed::segment_latin1,
    109            FnInStruct
    110        )]
    111        #[diplomat::attr(not(supports = utf8_strings), disable)]
    112        pub fn segment_latin1<'a>(
    113            &'a self,
    114            input: &'a [u8],
    115        ) -> Box<GraphemeClusterBreakIteratorLatin1<'a>> {
    116            Box::new(GraphemeClusterBreakIteratorLatin1(
    117                self.0.as_borrowed().segment_latin1(input),
    118            ))
    119        }
    120    }
    121 
    122    impl<'a> GraphemeClusterBreakIteratorUtf8<'a> {
    123        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
    124        /// out of range of a 32-bit signed integer.
    125        #[diplomat::rust_link(
    126            icu::segmenter::iterators::GraphemeClusterBreakIterator::next,
    127            FnInStruct
    128        )]
    129        #[diplomat::rust_link(
    130            icu::segmenter::iterators::GraphemeClusterBreakIterator::Item,
    131            AssociatedTypeInStruct,
    132            hidden
    133        )]
    134        pub fn next(&mut self) -> i32 {
    135            self.0
    136                .next()
    137                .and_then(|u| i32::try_from(u).ok())
    138                .unwrap_or(-1)
    139        }
    140    }
    141 
    142    impl<'a> GraphemeClusterBreakIteratorUtf16<'a> {
    143        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
    144        /// out of range of a 32-bit signed integer.
    145        #[diplomat::rust_link(
    146            icu::segmenter::iterators::GraphemeClusterBreakIterator::next,
    147            FnInStruct
    148        )]
    149        #[diplomat::rust_link(
    150            icu::segmenter::iterators::GraphemeClusterBreakIterator::Item,
    151            AssociatedTypeInStruct,
    152            hidden
    153        )]
    154        pub fn next(&mut self) -> i32 {
    155            self.0
    156                .next()
    157                .and_then(|u| i32::try_from(u).ok())
    158                .unwrap_or(-1)
    159        }
    160    }
    161 
    162    impl<'a> GraphemeClusterBreakIteratorLatin1<'a> {
    163        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
    164        /// out of range of a 32-bit signed integer.
    165        #[diplomat::rust_link(
    166            icu::segmenter::iterators::GraphemeClusterBreakIterator::next,
    167            FnInStruct
    168        )]
    169        #[diplomat::rust_link(
    170            icu::segmenter::iterators::GraphemeClusterBreakIterator::Item,
    171            AssociatedTypeInStruct,
    172            hidden
    173        )]
    174        pub fn next(&mut self) -> i32 {
    175            self.0
    176                .next()
    177                .and_then(|u| i32::try_from(u).ok())
    178                .unwrap_or(-1)
    179        }
    180    }
    181 }