WordSegmenter.d.ts (5996B)
1 // generated by diplomat-tool 2 import type { DataError } from "./DataError" 3 import type { DataProvider } from "./DataProvider" 4 import type { Locale } from "./Locale" 5 import type { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16" 6 import type { pointer, codepoint } from "./diplomat-runtime.d.ts"; 7 8 9 /** 10 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings. 11 * 12 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information. 13 */ 14 15 16 export class WordSegmenter { 17 get ffiValue(): pointer; 18 19 20 /** 21 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 22 * or dictionary payload data, using compiled data. This does not assume any content locale. 23 * 24 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 25 * Khmer, Lao, and Thai. 26 * 27 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information. 28 */ 29 static createAuto(): WordSegmenter; 30 31 /** 32 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 33 * or dictionary payload data, using compiled data. 34 * 35 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 36 * Khmer, Lao, and Thai. 37 * 38 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 39 */ 40 static createAutoWithContentLocale(locale: Locale): WordSegmenter; 41 42 /** 43 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 44 * or dictionary payload data, using a particular data source. 45 * 46 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 47 * Khmer, Lao, and Thai. 48 * 49 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 50 */ 51 static createAutoWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; 52 53 /** 54 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 55 * Thai, using compiled data. This does not assume any content locale. 56 * 57 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 58 * Khmer, Lao, and Thai. 59 * 60 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information. 61 */ 62 static createLstm(): WordSegmenter; 63 64 /** 65 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 66 * Thai, using compiled data. 67 * 68 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 69 * Khmer, Lao, and Thai. 70 * 71 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 72 */ 73 static createLstmWithContentLocale(locale: Locale): WordSegmenter; 74 75 /** 76 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 77 * Thai, using a particular data source. 78 * 79 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 80 * Khmer, Lao, and Thai. 81 * 82 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 83 */ 84 static createLstmWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; 85 86 /** 87 * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese, 88 * Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale. 89 * 90 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 91 * Khmer, Lao, and Thai. 92 * 93 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information. 94 */ 95 static createDictionary(): WordSegmenter; 96 97 /** 98 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 99 * Burmese, Khmer, Lao, and Thai, using compiled data. 100 * 101 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 102 * Khmer, Lao, and Thai. 103 * 104 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 105 */ 106 static createDictionaryWithContentLocale(locale: Locale): WordSegmenter; 107 108 /** 109 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 110 * Burmese, Khmer, Lao, and Thai, using a particular data source. 111 * 112 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 113 * Khmer, Lao, and Thai. 114 * 115 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 116 */ 117 static createDictionaryWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter; 118 119 /** 120 * Segments a string. 121 * 122 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 123 * to the WHATWG Encoding Standard. 124 * 125 * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information. 126 */ 127 segment(input: string): WordBreakIteratorUtf16; 128 }