WordSegmenter.d.hpp (8491B)
1 #ifndef icu4x_WordSegmenter_D_HPP 2 #define icu4x_WordSegmenter_D_HPP 3 4 #include <stdio.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdbool.h> 8 #include <memory> 9 #include <functional> 10 #include <optional> 11 #include <cstdlib> 12 #include "../diplomat_runtime.hpp" 13 14 namespace icu4x { 15 namespace capi { struct DataProvider; } 16 class DataProvider; 17 namespace capi { struct Locale; } 18 class Locale; 19 namespace capi { struct WordBreakIteratorLatin1; } 20 class WordBreakIteratorLatin1; 21 namespace capi { struct WordBreakIteratorUtf16; } 22 class WordBreakIteratorUtf16; 23 namespace capi { struct WordBreakIteratorUtf8; } 24 class WordBreakIteratorUtf8; 25 namespace capi { struct WordSegmenter; } 26 class WordSegmenter; 27 class DataError; 28 } 29 30 31 namespace icu4x { 32 namespace capi { 33 struct WordSegmenter; 34 } // namespace capi 35 } // namespace 36 37 namespace icu4x { 38 /** 39 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings. 40 * 41 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information. 42 */ 43 class WordSegmenter { 44 public: 45 46 /** 47 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 48 * or dictionary payload data, using compiled data. This does not assume any content locale. 49 * 50 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 51 * Khmer, Lao, and Thai. 52 * 53 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information. 54 */ 55 inline static std::unique_ptr<icu4x::WordSegmenter> create_auto(); 56 57 /** 58 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 59 * or dictionary payload data, using compiled data. 60 * 61 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 62 * Khmer, Lao, and Thai. 63 * 64 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 65 */ 66 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_auto_with_content_locale(const icu4x::Locale& locale); 67 68 /** 69 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 70 * or dictionary payload data, using a particular data source. 71 * 72 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 73 * Khmer, Lao, and Thai. 74 * 75 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 76 */ 77 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_auto_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale); 78 79 /** 80 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 81 * Thai, using compiled data. This does not assume any content locale. 82 * 83 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 84 * Khmer, Lao, and Thai. 85 * 86 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information. 87 */ 88 inline static std::unique_ptr<icu4x::WordSegmenter> create_lstm(); 89 90 /** 91 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 92 * Thai, using compiled data. 93 * 94 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 95 * Khmer, Lao, and Thai. 96 * 97 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 98 */ 99 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_lstm_with_content_locale(const icu4x::Locale& locale); 100 101 /** 102 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 103 * Thai, using a particular data source. 104 * 105 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 106 * Khmer, Lao, and Thai. 107 * 108 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 109 */ 110 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_lstm_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale); 111 112 /** 113 * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese, 114 * Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale. 115 * 116 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 117 * Khmer, Lao, and Thai. 118 * 119 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information. 120 */ 121 inline static std::unique_ptr<icu4x::WordSegmenter> create_dictionary(); 122 123 /** 124 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 125 * Burmese, Khmer, Lao, and Thai, using compiled data. 126 * 127 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 128 * Khmer, Lao, and Thai. 129 * 130 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 131 */ 132 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_dictionary_with_content_locale(const icu4x::Locale& locale); 133 134 /** 135 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 136 * Burmese, Khmer, Lao, and Thai, using a particular data source. 137 * 138 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 139 * Khmer, Lao, and Thai. 140 * 141 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 142 */ 143 inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_dictionary_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale); 144 145 /** 146 * Segments a string. 147 * 148 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 149 * to the WHATWG Encoding Standard. 150 * 151 * See the [Rust documentation for `segment_utf8`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf8) for more information. 152 */ 153 inline std::unique_ptr<icu4x::WordBreakIteratorUtf8> segment(std::string_view input) const; 154 155 /** 156 * Segments a string. 157 * 158 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 159 * to the WHATWG Encoding Standard. 160 * 161 * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information. 162 */ 163 inline std::unique_ptr<icu4x::WordBreakIteratorUtf16> segment16(std::u16string_view input) const; 164 165 /** 166 * Segments a Latin-1 string. 167 * 168 * See the [Rust documentation for `segment_latin1`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_latin1) for more information. 169 */ 170 inline std::unique_ptr<icu4x::WordBreakIteratorLatin1> segment_latin1(diplomat::span<const uint8_t> input) const; 171 172 inline const icu4x::capi::WordSegmenter* AsFFI() const; 173 inline icu4x::capi::WordSegmenter* AsFFI(); 174 inline static const icu4x::WordSegmenter* FromFFI(const icu4x::capi::WordSegmenter* ptr); 175 inline static icu4x::WordSegmenter* FromFFI(icu4x::capi::WordSegmenter* ptr); 176 inline static void operator delete(void* ptr); 177 private: 178 WordSegmenter() = delete; 179 WordSegmenter(const icu4x::WordSegmenter&) = delete; 180 WordSegmenter(icu4x::WordSegmenter&&) noexcept = delete; 181 WordSegmenter operator=(const icu4x::WordSegmenter&) = delete; 182 WordSegmenter operator=(icu4x::WordSegmenter&&) noexcept = delete; 183 static void operator delete[](void*, size_t) = delete; 184 }; 185 186 } // namespace 187 #endif // icu4x_WordSegmenter_D_HPP