tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WordSegmenter.d.ts (5996B)


      1 // generated by diplomat-tool
      2 import type { DataError } from "./DataError"
      3 import type { DataProvider } from "./DataProvider"
      4 import type { Locale } from "./Locale"
      5 import type { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16"
      6 import type { pointer, codepoint } from "./diplomat-runtime.d.ts";
      7 
      8 
      9 /**
     10 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
     11 *
     12 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information.
     13 */
     14 
     15 
     16 export class WordSegmenter {
     17    get ffiValue(): pointer;
     18 
     19 
     20    /**
     21     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     22     * or dictionary payload data, using compiled data. This does not assume any content locale.
     23     *
     24     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     25     * Khmer, Lao, and Thai.
     26     *
     27     * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
     28     */
     29    static createAuto(): WordSegmenter;
     30 
     31    /**
     32     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     33     * or dictionary payload data, using compiled data.
     34     *
     35     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     36     * Khmer, Lao, and Thai.
     37     *
     38     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     39     */
     40    static createAutoWithContentLocale(locale: Locale): WordSegmenter;
     41 
     42    /**
     43     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     44     * or dictionary payload data, using a particular data source.
     45     *
     46     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     47     * Khmer, Lao, and Thai.
     48     *
     49     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     50     */
     51    static createAutoWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
     52 
     53    /**
     54     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
     55     * Thai, using compiled data.  This does not assume any content locale.
     56     *
     57     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     58     * Khmer, Lao, and Thai.
     59     *
     60     * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
     61     */
     62    static createLstm(): WordSegmenter;
     63 
     64    /**
     65     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
     66     * Thai, using compiled data.
     67     *
     68     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     69     * Khmer, Lao, and Thai.
     70     *
     71     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
     72     */
     73    static createLstmWithContentLocale(locale: Locale): WordSegmenter;
     74 
     75    /**
     76     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
     77     * Thai, using a particular data source.
     78     *
     79     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     80     * Khmer, Lao, and Thai.
     81     *
     82     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
     83     */
     84    static createLstmWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
     85 
     86    /**
     87     * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese,
     88     * Burmese, Khmer, Lao, and Thai, using compiled data.  This does not assume any content locale.
     89     *
     90     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
     91     * Khmer, Lao, and Thai.
     92     *
     93     * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
     94     */
     95    static createDictionary(): WordSegmenter;
     96 
     97    /**
     98     * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
     99     * Burmese, Khmer, Lao, and Thai, using compiled data.
    100     *
    101     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    102     * Khmer, Lao, and Thai.
    103     *
    104     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    105     */
    106    static createDictionaryWithContentLocale(locale: Locale): WordSegmenter;
    107 
    108    /**
    109     * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
    110     * Burmese, Khmer, Lao, and Thai, using a particular data source.
    111     *
    112     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    113     * Khmer, Lao, and Thai.
    114     *
    115     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    116     */
    117    static createDictionaryWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
    118 
    119    /**
    120     * Segments a string.
    121     *
    122     * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
    123     * to the WHATWG Encoding Standard.
    124     *
    125     * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
    126     */
    127    segment(input: string): WordBreakIteratorUtf16;
    128 }