LineSegmenter.d.hpp (7576B)
1 #ifndef icu4x_LineSegmenter_D_HPP 2 #define icu4x_LineSegmenter_D_HPP 3 4 #include <stdio.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdbool.h> 8 #include <memory> 9 #include <functional> 10 #include <optional> 11 #include <cstdlib> 12 #include "../diplomat_runtime.hpp" 13 14 namespace icu4x { 15 namespace capi { struct DataProvider; } 16 class DataProvider; 17 namespace capi { struct LineBreakIteratorLatin1; } 18 class LineBreakIteratorLatin1; 19 namespace capi { struct LineBreakIteratorUtf16; } 20 class LineBreakIteratorUtf16; 21 namespace capi { struct LineBreakIteratorUtf8; } 22 class LineBreakIteratorUtf8; 23 namespace capi { struct LineSegmenter; } 24 class LineSegmenter; 25 namespace capi { struct Locale; } 26 class Locale; 27 struct LineBreakOptionsV2; 28 class DataError; 29 } 30 31 32 namespace icu4x { 33 namespace capi { 34 struct LineSegmenter; 35 } // namespace capi 36 } // namespace 37 38 namespace icu4x { 39 /** 40 * An ICU4X line-break segmenter, capable of finding breakpoints in strings. 41 * 42 * See the [Rust documentation for `LineSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html) for more information. 43 */ 44 class LineSegmenter { 45 public: 46 47 /** 48 * Construct a [`LineSegmenter`] with default options (no locale-based tailoring) using compiled data. It automatically loads the best 49 * available payload data for Burmese, Khmer, Lao, and Thai. 50 * 51 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_auto) for more information. 52 */ 53 inline static std::unique_ptr<icu4x::LineSegmenter> create_auto(); 54 55 /** 56 * Construct a [`LineSegmenter`] with default options (no locale-based tailoring) and LSTM payload data for 57 * Burmese, Khmer, Lao, and Thai, using compiled data. 58 * 59 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_lstm) for more information. 60 */ 61 inline static std::unique_ptr<icu4x::LineSegmenter> create_lstm(); 62 63 /** 64 * Construct a [`LineSegmenter`] with default options (no locale-based tailoring) and dictionary payload data for 65 * Burmese, Khmer, Lao, and Thai, using compiled data 66 * 67 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_dictionary) for more information. 68 */ 69 inline static std::unique_ptr<icu4x::LineSegmenter> create_dictionary(); 70 71 /** 72 * Construct a [`LineSegmenter`] with custom options using compiled data. It automatically loads the best 73 * available payload data for Burmese, Khmer, Lao, and Thai. 74 * 75 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_auto) for more information. 76 */ 77 inline static std::unique_ptr<icu4x::LineSegmenter> create_auto_with_options_v2(const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 78 79 /** 80 * Construct a [`LineSegmenter`] with custom options. It automatically loads the best 81 * available payload data for Burmese, Khmer, Lao, and Thai, using a particular data source. 82 * 83 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_auto) for more information. 84 */ 85 inline static diplomat::result<std::unique_ptr<icu4x::LineSegmenter>, icu4x::DataError> create_auto_with_options_v2_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 86 87 /** 88 * Construct a [`LineSegmenter`] with custom options and LSTM payload data for 89 * Burmese, Khmer, Lao, and Thai, using compiled data. 90 * 91 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_lstm) for more information. 92 */ 93 inline static std::unique_ptr<icu4x::LineSegmenter> create_lstm_with_options_v2(const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 94 95 /** 96 * Construct a [`LineSegmenter`] with custom options and LSTM payload data for 97 * Burmese, Khmer, Lao, and Thai, using a particular data source. 98 * 99 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_lstm) for more information. 100 */ 101 inline static diplomat::result<std::unique_ptr<icu4x::LineSegmenter>, icu4x::DataError> create_lstm_with_options_v2_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 102 103 /** 104 * Construct a [`LineSegmenter`] with custom options and dictionary payload data for 105 * Burmese, Khmer, Lao, and Thai, using compiled data. 106 * 107 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_dictionary) for more information. 108 */ 109 inline static std::unique_ptr<icu4x::LineSegmenter> create_dictionary_with_options_v2(const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 110 111 /** 112 * Construct a [`LineSegmenter`] with custom options and dictionary payload data for 113 * Burmese, Khmer, Lao, and Thai, using a particular data source. 114 * 115 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenter.html#method.new_dictionary) for more information. 116 */ 117 inline static diplomat::result<std::unique_ptr<icu4x::LineSegmenter>, icu4x::DataError> create_dictionary_with_options_v2_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale* content_locale, icu4x::LineBreakOptionsV2 options); 118 119 /** 120 * Segments a string. 121 * 122 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 123 * to the WHATWG Encoding Standard. 124 * 125 * See the [Rust documentation for `segment_utf8`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenterBorrowed.html#method.segment_utf8) for more information. 126 */ 127 inline std::unique_ptr<icu4x::LineBreakIteratorUtf8> segment(std::string_view input) const; 128 129 /** 130 * Segments a string. 131 * 132 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 133 * to the WHATWG Encoding Standard. 134 * 135 * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenterBorrowed.html#method.segment_utf16) for more information. 136 */ 137 inline std::unique_ptr<icu4x::LineBreakIteratorUtf16> segment16(std::u16string_view input) const; 138 139 /** 140 * Segments a Latin-1 string. 141 * 142 * See the [Rust documentation for `segment_latin1`](https://docs.rs/icu/latest/icu/segmenter/struct.LineSegmenterBorrowed.html#method.segment_latin1) for more information. 143 */ 144 inline std::unique_ptr<icu4x::LineBreakIteratorLatin1> segment_latin1(diplomat::span<const uint8_t> input) const; 145 146 inline const icu4x::capi::LineSegmenter* AsFFI() const; 147 inline icu4x::capi::LineSegmenter* AsFFI(); 148 inline static const icu4x::LineSegmenter* FromFFI(const icu4x::capi::LineSegmenter* ptr); 149 inline static icu4x::LineSegmenter* FromFFI(icu4x::capi::LineSegmenter* ptr); 150 inline static void operator delete(void* ptr); 151 private: 152 LineSegmenter() = delete; 153 LineSegmenter(const icu4x::LineSegmenter&) = delete; 154 LineSegmenter(icu4x::LineSegmenter&&) noexcept = delete; 155 LineSegmenter operator=(const icu4x::LineSegmenter&) = delete; 156 LineSegmenter operator=(icu4x::LineSegmenter&&) noexcept = delete; 157 static void operator delete[](void*, size_t) = delete; 158 }; 159 160 } // namespace 161 #endif // icu4x_LineSegmenter_D_HPP