normalizer.rs (14002B)
1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 #[diplomat::bridge] 6 #[diplomat::abi_rename = "icu4x_{0}_mv1"] 7 #[diplomat::attr(auto, namespace = "icu4x")] 8 pub mod ffi { 9 use alloc::boxed::Box; 10 11 #[cfg(feature = "buffer_provider")] 12 use crate::unstable::{errors::ffi::DataError, provider::ffi::DataProvider}; 13 14 #[diplomat::opaque] 15 #[diplomat::rust_link(icu::normalizer::ComposingNormalizer, Struct)] 16 #[diplomat::rust_link(icu::normalizer::ComposingNormalizerBorrowed, Struct, hidden)] 17 pub struct ComposingNormalizer(pub icu_normalizer::ComposingNormalizer); 18 19 impl ComposingNormalizer { 20 /// Construct a new ComposingNormalizer instance for NFC using compiled data. 21 #[diplomat::rust_link(icu::normalizer::ComposingNormalizer::new_nfc, FnInStruct)] 22 #[diplomat::rust_link( 23 icu::normalizer::ComposingNormalizerBorrowed::new_nfc, 24 FnInStruct, 25 hidden 26 )] 27 #[diplomat::attr(auto, named_constructor = "nfc")] 28 #[diplomat::demo(default_constructor)] 29 #[cfg(feature = "compiled_data")] 30 pub fn create_nfc() -> Box<ComposingNormalizer> { 31 Box::new(ComposingNormalizer( 32 icu_normalizer::ComposingNormalizer::new_nfc().static_to_owned(), 33 )) 34 } 35 /// Construct a new ComposingNormalizer instance for NFC using a particular data source. 36 #[diplomat::rust_link(icu::normalizer::ComposingNormalizer::new_nfc, FnInStruct)] 37 #[diplomat::rust_link( 38 icu::normalizer::ComposingNormalizerBorrowed::new_nfc, 39 FnInStruct, 40 hidden 41 )] 42 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "nfc_with_provider")] 43 #[cfg(feature = "buffer_provider")] 44 pub fn create_nfc_with_provider( 45 provider: &DataProvider, 46 ) -> Result<Box<ComposingNormalizer>, DataError> { 47 Ok(Box::new(ComposingNormalizer( 48 icu_normalizer::ComposingNormalizer::try_new_nfc_with_buffer_provider( 49 provider.get()?, 50 )?, 51 ))) 52 } 53 /// Construct a new ComposingNormalizer instance for NFKC using compiled data. 54 #[diplomat::rust_link(icu::normalizer::ComposingNormalizer::new_nfkc, FnInStruct)] 55 #[diplomat::rust_link( 56 icu::normalizer::ComposingNormalizerBorrowed::new_nfkc, 57 FnInStruct, 58 hidden 59 )] 60 #[diplomat::attr(auto, named_constructor = "nfkc")] 61 #[cfg(feature = "compiled_data")] 62 pub fn create_nfkc() -> Box<ComposingNormalizer> { 63 Box::new(ComposingNormalizer( 64 icu_normalizer::ComposingNormalizer::new_nfkc().static_to_owned(), 65 )) 66 } 67 /// Construct a new ComposingNormalizer instance for NFKC using a particular data source. 68 #[diplomat::rust_link(icu::normalizer::ComposingNormalizer::new_nfkc, FnInStruct)] 69 #[diplomat::rust_link( 70 icu::normalizer::ComposingNormalizerBorrowed::new_nfkc, 71 FnInStruct, 72 hidden 73 )] 74 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "nfkc_with_provider")] 75 #[cfg(feature = "buffer_provider")] 76 pub fn create_nfkc_with_provider( 77 provider: &DataProvider, 78 ) -> Result<Box<ComposingNormalizer>, DataError> { 79 Ok(Box::new(ComposingNormalizer( 80 icu_normalizer::ComposingNormalizer::try_new_nfkc_with_buffer_provider( 81 provider.get()?, 82 )?, 83 ))) 84 } 85 /// Normalize a string 86 /// 87 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 88 /// to the WHATWG Encoding Standard. 89 #[diplomat::rust_link( 90 icu::normalizer::ComposingNormalizerBorrowed::normalize_utf8, 91 FnInStruct 92 )] 93 #[diplomat::rust_link( 94 icu::normalizer::ComposingNormalizerBorrowed::normalize, 95 FnInStruct, 96 hidden 97 )] 98 #[diplomat::rust_link( 99 icu::normalizer::ComposingNormalizerBorrowed::normalize_to, 100 FnInStruct, 101 hidden 102 )] 103 #[diplomat::rust_link( 104 icu::normalizer::ComposingNormalizerBorrowed::normalize_utf8_to, 105 FnInStruct, 106 hidden 107 )] 108 pub fn normalize(&self, s: &DiplomatStr, write: &mut DiplomatWrite) { 109 let _infallible = self.0.as_borrowed().normalize_utf8_to(s, write); 110 } 111 112 /// Check if a string is normalized 113 /// 114 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 115 /// to the WHATWG Encoding Standard. 116 #[diplomat::rust_link( 117 icu::normalizer::ComposingNormalizerBorrowed::is_normalized_utf8, 118 FnInStruct 119 )] 120 #[diplomat::rust_link( 121 icu::normalizer::ComposingNormalizerBorrowed::is_normalized, 122 FnInStruct, 123 hidden 124 )] 125 #[diplomat::attr(not(supports = utf8_strings), disable)] 126 #[diplomat::attr(*, rename = "is_normalized")] 127 pub fn is_normalized_utf8(&self, s: &DiplomatStr) -> bool { 128 self.0.as_borrowed().is_normalized_utf8(s) 129 } 130 131 /// Check if a string is normalized 132 /// 133 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 134 /// to the WHATWG Encoding Standard. 135 #[diplomat::rust_link( 136 icu::normalizer::ComposingNormalizerBorrowed::is_normalized_utf16, 137 FnInStruct 138 )] 139 #[diplomat::attr(not(supports = utf8_strings), rename = "is_normalized")] 140 #[diplomat::attr(supports = utf8_strings, rename = "is_normalized16")] 141 pub fn is_normalized_utf16(&self, s: &DiplomatStr16) -> bool { 142 self.0.as_borrowed().is_normalized_utf16(s) 143 } 144 145 /// Return the index a slice of potentially-invalid UTF-8 is normalized up to 146 #[diplomat::rust_link( 147 icu::normalizer::ComposingNormalizerBorrowed::split_normalized_utf8, 148 FnInStruct 149 )] 150 #[diplomat::rust_link( 151 icu::normalizer::ComposingNormalizerBorrowed::split_normalized, 152 FnInStruct 153 )] 154 #[diplomat::attr(not(supports = utf8_strings), disable)] 155 #[diplomat::attr(*, rename = "is_normalized_up_to")] 156 pub fn is_normalized_utf8_up_to(&self, s: &DiplomatStr) -> usize { 157 self.0.as_borrowed().split_normalized_utf8(s).0.len() 158 } 159 160 /// Return the index a slice of potentially-invalid UTF-16 is normalized up to 161 #[diplomat::rust_link( 162 icu::normalizer::ComposingNormalizerBorrowed::split_normalized_utf16, 163 FnInStruct 164 )] 165 #[diplomat::attr(not(supports = utf8_strings), rename = "is_normalized_up_to")] 166 #[diplomat::attr(supports = utf8_strings, rename = "is_normalized16_up_to")] 167 pub fn is_normalized_utf16_up_to(&self, s: &DiplomatStr16) -> usize { 168 self.0.as_borrowed().split_normalized_utf16(s).0.len() 169 } 170 } 171 172 #[diplomat::opaque] 173 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizer, Struct)] 174 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizerBorrowed, Struct, hidden)] 175 pub struct DecomposingNormalizer(pub icu_normalizer::DecomposingNormalizer); 176 177 impl DecomposingNormalizer { 178 /// Construct a new DecomposingNormalizer instance for NFD using compiled data. 179 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizer::new_nfd, FnInStruct)] 180 #[diplomat::rust_link( 181 icu::normalizer::DecomposingNormalizerBorrowed::new_nfd, 182 FnInStruct, 183 hidden 184 )] 185 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "nfd")] 186 #[diplomat::demo(default_constructor)] 187 #[cfg(feature = "compiled_data")] 188 pub fn create_nfd() -> Box<DecomposingNormalizer> { 189 Box::new(DecomposingNormalizer( 190 icu_normalizer::DecomposingNormalizer::new_nfd().static_to_owned(), 191 )) 192 } 193 194 /// Construct a new DecomposingNormalizer instance for NFD using a particular data source. 195 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizer::new_nfd, FnInStruct)] 196 #[diplomat::rust_link( 197 icu::normalizer::DecomposingNormalizerBorrowed::new_nfd, 198 FnInStruct, 199 hidden 200 )] 201 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "nfd_with_provider")] 202 #[cfg(feature = "buffer_provider")] 203 pub fn create_nfd_with_provider( 204 provider: &DataProvider, 205 ) -> Result<Box<DecomposingNormalizer>, DataError> { 206 Ok(Box::new(DecomposingNormalizer( 207 icu_normalizer::DecomposingNormalizer::try_new_nfd_with_buffer_provider( 208 provider.get()?, 209 )?, 210 ))) 211 } 212 213 /// Construct a new DecomposingNormalizer instance for NFKD using compiled data. 214 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizer::new_nfkd, FnInStruct)] 215 #[diplomat::rust_link( 216 icu::normalizer::DecomposingNormalizerBorrowed::new_nfkd, 217 FnInStruct, 218 hidden 219 )] 220 #[diplomat::attr(auto, named_constructor = "nfkd")] 221 #[cfg(feature = "compiled_data")] 222 pub fn create_nfkd() -> Box<DecomposingNormalizer> { 223 Box::new(DecomposingNormalizer( 224 icu_normalizer::DecomposingNormalizer::new_nfkd().static_to_owned(), 225 )) 226 } 227 228 /// Construct a new DecomposingNormalizer instance for NFKD using a particular data source. 229 #[diplomat::rust_link(icu::normalizer::DecomposingNormalizer::new_nfkd, FnInStruct)] 230 #[diplomat::rust_link( 231 icu::normalizer::DecomposingNormalizerBorrowed::new_nfkd, 232 FnInStruct, 233 hidden 234 )] 235 #[diplomat::attr(all(supports = fallible_constructors, supports = named_constructors), named_constructor = "nfkd_with_provider")] 236 #[cfg(feature = "buffer_provider")] 237 pub fn create_nfkd_with_provider( 238 provider: &DataProvider, 239 ) -> Result<Box<DecomposingNormalizer>, DataError> { 240 Ok(Box::new(DecomposingNormalizer( 241 icu_normalizer::DecomposingNormalizer::try_new_nfkd_with_buffer_provider( 242 provider.get()?, 243 )?, 244 ))) 245 } 246 247 /// Normalize a string 248 /// 249 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 250 /// to the WHATWG Encoding Standard. 251 #[diplomat::rust_link( 252 icu::normalizer::DecomposingNormalizerBorrowed::normalize_utf8, 253 FnInStruct 254 )] 255 #[diplomat::rust_link( 256 icu::normalizer::DecomposingNormalizerBorrowed::normalize, 257 FnInStruct, 258 hidden 259 )] 260 #[diplomat::rust_link( 261 icu::normalizer::DecomposingNormalizerBorrowed::normalize_to, 262 FnInStruct, 263 hidden 264 )] 265 #[diplomat::rust_link( 266 icu::normalizer::DecomposingNormalizerBorrowed::normalize_utf8_to, 267 FnInStruct, 268 hidden 269 )] 270 pub fn normalize(&self, s: &DiplomatStr, write: &mut DiplomatWrite) { 271 let _infallible = self.0.as_borrowed().normalize_utf8_to(s, write); 272 } 273 274 /// Check if a string is normalized 275 /// 276 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 277 /// to the WHATWG Encoding Standard. 278 #[diplomat::rust_link( 279 icu::normalizer::DecomposingNormalizerBorrowed::is_normalized_utf8, 280 FnInStruct 281 )] 282 #[diplomat::rust_link( 283 icu::normalizer::DecomposingNormalizerBorrowed::is_normalized, 284 FnInStruct, 285 hidden 286 )] 287 pub fn is_normalized(&self, s: &DiplomatStr) -> bool { 288 self.0.as_borrowed().is_normalized_utf8(s) 289 } 290 291 /// Check if a string is normalized 292 /// 293 /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 294 /// to the WHATWG Encoding Standard. 295 #[diplomat::rust_link( 296 icu::normalizer::DecomposingNormalizerBorrowed::is_normalized_utf16, 297 FnInStruct 298 )] 299 pub fn is_normalized_utf16(&self, s: &DiplomatStr16) -> bool { 300 self.0.as_borrowed().is_normalized_utf16(s) 301 } 302 303 /// Return the index a slice of potentially-invalid UTF-8 is normalized up to 304 #[diplomat::rust_link( 305 icu::normalizer::DecomposingNormalizerBorrowed::split_normalized_utf8, 306 FnInStruct 307 )] 308 #[diplomat::rust_link( 309 icu::normalizer::DecomposingNormalizerBorrowed::split_normalized, 310 FnInStruct 311 )] 312 pub fn is_normalized_up_to(&self, s: &DiplomatStr) -> usize { 313 self.0.as_borrowed().split_normalized_utf8(s).0.len() 314 } 315 316 /// Return the index a slice of potentially-invalid UTF-16 is normalized up to 317 #[diplomat::rust_link( 318 icu::normalizer::DecomposingNormalizerBorrowed::split_normalized_utf16, 319 FnInStruct 320 )] 321 pub fn is_normalized_utf16_up_to(&self, s: &DiplomatStr16) -> usize { 322 self.0.as_borrowed().split_normalized_utf16(s).0.len() 323 } 324 } 325 }