tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SentenceSegmenter.mjs (5357B)


      1 // generated by diplomat-tool
      2 import { DataError } from "./DataError.mjs"
      3 import { DataProvider } from "./DataProvider.mjs"
      4 import { Locale } from "./Locale.mjs"
      5 import { SentenceBreakIteratorUtf16 } from "./SentenceBreakIteratorUtf16.mjs"
      6 import wasm from "./diplomat-wasm.mjs";
      7 import * as diplomatRuntime from "./diplomat-runtime.mjs";
      8 
      9 
     10 /**
     11 * An ICU4X sentence-break segmenter, capable of finding sentence breakpoints in strings.
     12 *
     13 * See the [Rust documentation for `SentenceSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenter.html) for more information.
     14 */
     15 const SentenceSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => {
     16    wasm.icu4x_SentenceSegmenter_destroy_mv1(ptr);
     17 });
     18 
     19 export class SentenceSegmenter {
     20    // Internal ptr reference:
     21    #ptr = null;
     22 
     23    // Lifetimes are only to keep dependencies alive.
     24    // Since JS won't garbage collect until there are no incoming edges.
     25    #selfEdge = [];
     26 
     27    #internalConstructor(symbol, ptr, selfEdge) {
     28        if (symbol !== diplomatRuntime.internalConstructor) {
     29            console.error("SentenceSegmenter is an Opaque type. You cannot call its constructor.");
     30            return;
     31        }
     32        this.#ptr = ptr;
     33        this.#selfEdge = selfEdge;
     34 
     35        // Are we being borrowed? If not, we can register.
     36        if (this.#selfEdge.length === 0) {
     37            SentenceSegmenter_box_destroy_registry.register(this, this.#ptr);
     38        }
     39 
     40        return this;
     41    }
     42    get ffiValue() {
     43        return this.#ptr;
     44    }
     45 
     46 
     47    /**
     48     * Construct a [`SentenceSegmenter`] using compiled data. This does not assume any content locale.
     49     *
     50     * See the [Rust documentation for `new`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenter.html#method.new) for more information.
     51     */
     52    #defaultConstructor() {
     53 
     54        const result = wasm.icu4x_SentenceSegmenter_create_mv1();
     55 
     56        try {
     57            return new SentenceSegmenter(diplomatRuntime.internalConstructor, result, []);
     58        }
     59 
     60        finally {
     61        }
     62    }
     63 
     64    /**
     65     * Construct a [`SentenceSegmenter`] for content known to be of a given locale, using compiled data.
     66     */
     67    static createWithContentLocale(locale) {
     68        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
     69 
     70 
     71        const result = wasm.icu4x_SentenceSegmenter_create_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
     72 
     73        try {
     74            if (!diplomatReceive.resultFlag) {
     75                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
     76                throw new globalThis.Error('DataError: ' + cause.value, { cause });
     77            }
     78            return new SentenceSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
     79        }
     80 
     81        finally {
     82            diplomatReceive.free();
     83        }
     84    }
     85 
     86    /**
     87     * Construct a [`SentenceSegmenter`]  for content known to be of a given locale, using a particular data source.
     88     */
     89    static createWithContentLocaleAndProvider(provider, locale) {
     90        const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
     91 
     92 
     93        const result = wasm.icu4x_SentenceSegmenter_create_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
     94 
     95        try {
     96            if (!diplomatReceive.resultFlag) {
     97                const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
     98                throw new globalThis.Error('DataError: ' + cause.value, { cause });
     99            }
    100            return new SentenceSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
    101        }
    102 
    103        finally {
    104            diplomatReceive.free();
    105        }
    106    }
    107 
    108    /**
    109     * Segments a string.
    110     *
    111     * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
    112     * to the WHATWG Encoding Standard.
    113     *
    114     * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenterBorrowed.html#method.segment_utf16) for more information.
    115     */
    116    segment(input) {
    117        let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip();
    118        const inputSlice = diplomatRuntime.DiplomatBuf.str16(wasm, input);
    119        // This lifetime edge depends on lifetimes 'a
    120        let aEdges = [this, inputSlice];
    121 
    122 
    123        const result = wasm.icu4x_SentenceSegmenter_segment_utf16_mv1(this.ffiValue, ...inputSlice.splat());
    124 
    125        try {
    126            return new SentenceBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges);
    127        }
    128 
    129        finally {
    130            functionGarbageCollectorGrip.releaseToGarbageCollector();
    131 
    132        }
    133    }
    134 
    135    constructor() {
    136        if (arguments[0] === diplomatRuntime.exposeConstructor) {
    137            return this.#internalConstructor(...Array.prototype.slice.call(arguments, 1));
    138        } else if (arguments[0] === diplomatRuntime.internalConstructor) {
    139            return this.#internalConstructor(...arguments);
    140        } else {
    141            return this.#defaultConstructor(...arguments);
    142        }
    143    }
    144 }