tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WordSegmenter.d.hpp (8491B)


      1 #ifndef icu4x_WordSegmenter_D_HPP
      2 #define icu4x_WordSegmenter_D_HPP
      3 
      4 #include <stdio.h>
      5 #include <stdint.h>
      6 #include <stddef.h>
      7 #include <stdbool.h>
      8 #include <memory>
      9 #include <functional>
     10 #include <optional>
     11 #include <cstdlib>
     12 #include "../diplomat_runtime.hpp"
     13 
     14 namespace icu4x {
     15 namespace capi { struct DataProvider; }
     16 class DataProvider;
     17 namespace capi { struct Locale; }
     18 class Locale;
     19 namespace capi { struct WordBreakIteratorLatin1; }
     20 class WordBreakIteratorLatin1;
     21 namespace capi { struct WordBreakIteratorUtf16; }
     22 class WordBreakIteratorUtf16;
     23 namespace capi { struct WordBreakIteratorUtf8; }
     24 class WordBreakIteratorUtf8;
     25 namespace capi { struct WordSegmenter; }
     26 class WordSegmenter;
     27 class DataError;
     28 }
     29 
     30 
     31 namespace icu4x {
     32 namespace capi {
     33    struct WordSegmenter;
     34 } // namespace capi
     35 } // namespace
     36 
     37 namespace icu4x {
     38 /**
     39 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
     40 *
     41 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information.
     42 */
     43 class WordSegmenter {
     44 public:
     45 
     46  /**
     47   * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     48   * or dictionary payload data, using compiled data. This does not assume any content locale.
     49   *
     50   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     51   * Khmer, Lao, and Thai.
     52   *
     53   * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
     54   */
     55  inline static std::unique_ptr<icu4x::WordSegmenter> create_auto();
     56 
     57  /**
     58   * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     59   * or dictionary payload data, using compiled data.
     60   *
     61   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     62   * Khmer, Lao, and Thai.
     63   *
     64   * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     65   */
     66  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_auto_with_content_locale(const icu4x::Locale& locale);
     67 
     68  /**
     69   * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     70   * or dictionary payload data, using a particular data source.
     71   *
     72   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     73   * Khmer, Lao, and Thai.
     74   *
     75   * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     76   */
     77  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_auto_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale);
     78 
     79  /**
     80   * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
     81   * Thai, using compiled data.  This does not assume any content locale.
     82   *
     83   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     84   * Khmer, Lao, and Thai.
     85   *
     86   * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
     87   */
     88  inline static std::unique_ptr<icu4x::WordSegmenter> create_lstm();
     89 
     90  /**
     91   * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
     92   * Thai, using compiled data.
     93   *
     94   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     95   * Khmer, Lao, and Thai.
     96   *
     97   * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
     98   */
     99  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_lstm_with_content_locale(const icu4x::Locale& locale);
    100 
    101  /**
    102   * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
    103   * Thai, using a particular data source.
    104   *
    105   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
    106   * Khmer, Lao, and Thai.
    107   *
    108   * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
    109   */
    110  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_lstm_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale);
    111 
    112  /**
    113   * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese,
    114   * Burmese, Khmer, Lao, and Thai, using compiled data.  This does not assume any content locale.
    115   *
    116   * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    117   * Khmer, Lao, and Thai.
    118   *
    119   * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
    120   */
    121  inline static std::unique_ptr<icu4x::WordSegmenter> create_dictionary();
    122 
    123  /**
    124   * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
    125   * Burmese, Khmer, Lao, and Thai, using compiled data.
    126   *
    127   * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    128   * Khmer, Lao, and Thai.
    129   *
    130   * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    131   */
    132  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_dictionary_with_content_locale(const icu4x::Locale& locale);
    133 
    134  /**
    135   * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
    136   * Burmese, Khmer, Lao, and Thai, using a particular data source.
    137   *
    138   * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    139   * Khmer, Lao, and Thai.
    140   *
    141   * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    142   */
    143  inline static diplomat::result<std::unique_ptr<icu4x::WordSegmenter>, icu4x::DataError> create_dictionary_with_content_locale_and_provider(const icu4x::DataProvider& provider, const icu4x::Locale& locale);
    144 
    145  /**
    146   * Segments a string.
    147   *
    148   * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
    149   * to the WHATWG Encoding Standard.
    150   *
    151   * See the [Rust documentation for `segment_utf8`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf8) for more information.
    152   */
    153  inline std::unique_ptr<icu4x::WordBreakIteratorUtf8> segment(std::string_view input) const;
    154 
    155  /**
    156   * Segments a string.
    157   *
    158   * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
    159   * to the WHATWG Encoding Standard.
    160   *
    161   * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
    162   */
    163  inline std::unique_ptr<icu4x::WordBreakIteratorUtf16> segment16(std::u16string_view input) const;
    164 
    165  /**
    166   * Segments a Latin-1 string.
    167   *
    168   * See the [Rust documentation for `segment_latin1`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_latin1) for more information.
    169   */
    170  inline std::unique_ptr<icu4x::WordBreakIteratorLatin1> segment_latin1(diplomat::span<const uint8_t> input) const;
    171 
    172  inline const icu4x::capi::WordSegmenter* AsFFI() const;
    173  inline icu4x::capi::WordSegmenter* AsFFI();
    174  inline static const icu4x::WordSegmenter* FromFFI(const icu4x::capi::WordSegmenter* ptr);
    175  inline static icu4x::WordSegmenter* FromFFI(icu4x::capi::WordSegmenter* ptr);
    176  inline static void operator delete(void* ptr);
    177 private:
    178  WordSegmenter() = delete;
    179  WordSegmenter(const icu4x::WordSegmenter&) = delete;
    180  WordSegmenter(icu4x::WordSegmenter&&) noexcept = delete;
    181  WordSegmenter operator=(const icu4x::WordSegmenter&) = delete;
    182  WordSegmenter operator=(icu4x::WordSegmenter&&) noexcept = delete;
    183  static void operator delete[](void*, size_t) = delete;
    184 };
    185 
    186 } // namespace
    187 #endif // icu4x_WordSegmenter_D_HPP