tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WordSegmenter.mjs (12535B)


      1 // generated by diplomat-tool
      2 import { DataError } from "./DataError.mjs"
      3 import { DataProvider } from "./DataProvider.mjs"
      4 import { Locale } from "./Locale.mjs"
      5 import { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16.mjs"
      6 import wasm from "./diplomat-wasm.mjs";
      7 import * as diplomatRuntime from "./diplomat-runtime.mjs";
      8 
      9 
     10 /**
     11 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
     12 *
     13 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information.
     14 */
     15 const WordSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => {
     16    wasm.icu4x_WordSegmenter_destroy_mv1(ptr);
     17 });
     18 
     19 export class WordSegmenter {
     20    // Internal ptr reference:
     21    #ptr = null;
     22 
     23    // Lifetimes are only to keep dependencies alive.
     24    // Since JS won't garbage collect until there are no incoming edges.
     25    #selfEdge = [];
     26 
     27    #internalConstructor(symbol, ptr, selfEdge) {
     28        if (symbol !== diplomatRuntime.internalConstructor) {
     29            console.error("WordSegmenter is an Opaque type. You cannot call its constructor.");
     30            return;
     31        }
     32        this.#ptr = ptr;
     33        this.#selfEdge = selfEdge;
     34 
     35        // Are we being borrowed? If not, we can register.
     36        if (this.#selfEdge.length === 0) {
     37            WordSegmenter_box_destroy_registry.register(this, this.#ptr);
     38        }
     39 
     40        return this;
     41    }
     42    get ffiValue() {
     43        return this.#ptr;
     44    }
     45 
     46 
     47    /**
     48     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     49     * or dictionary payload data, using compiled data. This does not assume any content locale.
     50     *
     51     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     52     * Khmer, Lao, and Thai.
     53     *
     54     * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
     55     */
     56    static createAuto() {
     57 
     58        const result = wasm.icu4x_WordSegmenter_create_auto_mv1();
     59 
     60        try {
     61            return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
     62        }
     63 
     64        finally {
     65        }
     66    }
     67 
     68    /**
     69     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     70     * or dictionary payload data, using compiled data.
     71     *
     72     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
     73     * Khmer, Lao, and Thai.
     74     *
     75     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
     76     */
     77    static createAutoWithContentLocale(locale) {
     78        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
     79 
     80 
     81        const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
     82 
     83        try {
     84            if (!diplomatReceive.resultFlag) {
     85                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
     86                throw new globalThis.Error('DataError: ' + cause.value, { cause });
     87            }
     88            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
     89        }
     90 
     91        finally {
     92            diplomatReceive.free();
     93        }
     94    }
     95 
     96    /**
     97     * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
     98     * or dictionary payload data, using a particular data source.
     99     *
    100     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
    101     * Khmer, Lao, and Thai.
    102     *
    103     * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
    104     */
    105    static createAutoWithContentLocaleAndProvider(provider, locale) {
    106        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
    107 
    108 
    109        const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
    110 
    111        try {
    112            if (!diplomatReceive.resultFlag) {
    113                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
    114                throw new globalThis.Error('DataError: ' + cause.value, { cause });
    115            }
    116            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    117        }
    118 
    119        finally {
    120            diplomatReceive.free();
    121        }
    122    }
    123 
    124    /**
    125     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
    126     * Thai, using compiled data.  This does not assume any content locale.
    127     *
    128     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
    129     * Khmer, Lao, and Thai.
    130     *
    131     * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
    132     */
    133    static createLstm() {
    134 
    135        const result = wasm.icu4x_WordSegmenter_create_lstm_mv1();
    136 
    137        try {
    138            return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
    139        }
    140 
    141        finally {
    142        }
    143    }
    144 
    145    /**
    146     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
    147     * Thai, using compiled data.
    148     *
    149     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
    150     * Khmer, Lao, and Thai.
    151     *
    152     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
    153     */
    154    static createLstmWithContentLocale(locale) {
    155        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
    156 
    157 
    158        const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
    159 
    160        try {
    161            if (!diplomatReceive.resultFlag) {
    162                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
    163                throw new globalThis.Error('DataError: ' + cause.value, { cause });
    164            }
    165            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    166        }
    167 
    168        finally {
    169            diplomatReceive.free();
    170        }
    171    }
    172 
    173    /**
    174     * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
    175     * Thai, using a particular data source.
    176     *
    177     * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
    178     * Khmer, Lao, and Thai.
    179     *
    180     * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
    181     */
    182    static createLstmWithContentLocaleAndProvider(provider, locale) {
    183        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
    184 
    185 
    186        const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
    187 
    188        try {
    189            if (!diplomatReceive.resultFlag) {
    190                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
    191                throw new globalThis.Error('DataError: ' + cause.value, { cause });
    192            }
    193            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    194        }
    195 
    196        finally {
    197            diplomatReceive.free();
    198        }
    199    }
    200 
    201    /**
    202     * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese,
    203     * Burmese, Khmer, Lao, and Thai, using compiled data.  This does not assume any content locale.
    204     *
    205     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    206     * Khmer, Lao, and Thai.
    207     *
    208     * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
    209     */
    210    static createDictionary() {
    211 
    212        const result = wasm.icu4x_WordSegmenter_create_dictionary_mv1();
    213 
    214        try {
    215            return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
    216        }
    217 
    218        finally {
    219        }
    220    }
    221 
    222    /**
    223     * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
    224     * Burmese, Khmer, Lao, and Thai, using compiled data.
    225     *
    226     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    227     * Khmer, Lao, and Thai.
    228     *
    229     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    230     */
    231    static createDictionaryWithContentLocale(locale) {
    232        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
    233 
    234 
    235        const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
    236 
    237        try {
    238            if (!diplomatReceive.resultFlag) {
    239                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
    240                throw new globalThis.Error('DataError: ' + cause.value, { cause });
    241            }
    242            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    243        }
    244 
    245        finally {
    246            diplomatReceive.free();
    247        }
    248    }
    249 
    250    /**
    251     * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
    252     * Burmese, Khmer, Lao, and Thai, using a particular data source.
    253     *
    254     * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
    255     * Khmer, Lao, and Thai.
    256     *
    257     * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
    258     */
    259    static createDictionaryWithContentLocaleAndProvider(provider, locale) {
    260        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
    261 
    262 
    263        const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
    264 
    265        try {
    266            if (!diplomatReceive.resultFlag) {
    267                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
    268                throw new globalThis.Error('DataError: ' + cause.value, { cause });
    269            }
    270            return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    271        }
    272 
    273        finally {
    274            diplomatReceive.free();
    275        }
    276    }
    277 
    278    /**
    279     * Segments a string.
    280     *
    281     * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
    282     * to the WHATWG Encoding Standard.
    283     *
    284     * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
    285     */
    286    segment(input) {
    287        let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip();
    288        const inputSlice = diplomatRuntime.DiplomatBuf.str16(wasm, input);
    289        // This lifetime edge depends on lifetimes 'a
    290        let aEdges = [this, inputSlice];
    291 
    292 
    293        const result = wasm.icu4x_WordSegmenter_segment_utf16_mv1(this.ffiValue, ...inputSlice.splat());
    294 
    295        try {
    296            return new WordBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges);
    297        }
    298 
    299        finally {
    300            functionGarbageCollectorGrip.releaseToGarbageCollector();
    301 
    302        }
    303    }
    304 
    305    constructor(symbol, ptr, selfEdge) {
    306        return this.#internalConstructor(...arguments)
    307    }
    308 }