SentenceSegmenter.mjs (5357B)
1 // generated by diplomat-tool 2 import { DataError } from "./DataError.mjs" 3 import { DataProvider } from "./DataProvider.mjs" 4 import { Locale } from "./Locale.mjs" 5 import { SentenceBreakIteratorUtf16 } from "./SentenceBreakIteratorUtf16.mjs" 6 import wasm from "./diplomat-wasm.mjs"; 7 import * as diplomatRuntime from "./diplomat-runtime.mjs"; 8 9 10 /** 11 * An ICU4X sentence-break segmenter, capable of finding sentence breakpoints in strings. 12 * 13 * See the [Rust documentation for `SentenceSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenter.html) for more information. 14 */ 15 const SentenceSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => { 16 wasm.icu4x_SentenceSegmenter_destroy_mv1(ptr); 17 }); 18 19 export class SentenceSegmenter { 20 // Internal ptr reference: 21 #ptr = null; 22 23 // Lifetimes are only to keep dependencies alive. 24 // Since JS won't garbage collect until there are no incoming edges. 25 #selfEdge = []; 26 27 #internalConstructor(symbol, ptr, selfEdge) { 28 if (symbol !== diplomatRuntime.internalConstructor) { 29 console.error("SentenceSegmenter is an Opaque type. You cannot call its constructor."); 30 return; 31 } 32 this.#ptr = ptr; 33 this.#selfEdge = selfEdge; 34 35 // Are we being borrowed? If not, we can register. 36 if (this.#selfEdge.length === 0) { 37 SentenceSegmenter_box_destroy_registry.register(this, this.#ptr); 38 } 39 40 return this; 41 } 42 get ffiValue() { 43 return this.#ptr; 44 } 45 46 47 /** 48 * Construct a [`SentenceSegmenter`] using compiled data. This does not assume any content locale. 49 * 50 * See the [Rust documentation for `new`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenter.html#method.new) for more information. 51 */ 52 #defaultConstructor() { 53 54 const result = wasm.icu4x_SentenceSegmenter_create_mv1(); 55 56 try { 57 return new SentenceSegmenter(diplomatRuntime.internalConstructor, result, []); 58 } 59 60 finally { 61 } 62 } 63 64 /** 65 * Construct a [`SentenceSegmenter`] for content known to be of a given locale, using compiled data. 66 */ 67 static createWithContentLocale(locale) { 68 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 69 70 71 const result = wasm.icu4x_SentenceSegmenter_create_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); 72 73 try { 74 if (!diplomatReceive.resultFlag) { 75 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 76 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 77 } 78 return new SentenceSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 79 } 80 81 finally { 82 diplomatReceive.free(); 83 } 84 } 85 86 /** 87 * Construct a [`SentenceSegmenter`] for content known to be of a given locale, using a particular data source. 88 */ 89 static createWithContentLocaleAndProvider(provider, locale) { 90 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 91 92 93 const result = wasm.icu4x_SentenceSegmenter_create_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); 94 95 try { 96 if (!diplomatReceive.resultFlag) { 97 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 98 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 99 } 100 return new SentenceSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 101 } 102 103 finally { 104 diplomatReceive.free(); 105 } 106 } 107 108 /** 109 * Segments a string. 110 * 111 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 112 * to the WHATWG Encoding Standard. 113 * 114 * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.SentenceSegmenterBorrowed.html#method.segment_utf16) for more information. 115 */ 116 segment(input) { 117 let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip(); 118 const inputSlice = diplomatRuntime.DiplomatBuf.str16(wasm, input); 119 // This lifetime edge depends on lifetimes 'a 120 let aEdges = [this, inputSlice]; 121 122 123 const result = wasm.icu4x_SentenceSegmenter_segment_utf16_mv1(this.ffiValue, ...inputSlice.splat()); 124 125 try { 126 return new SentenceBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges); 127 } 128 129 finally { 130 functionGarbageCollectorGrip.releaseToGarbageCollector(); 131 132 } 133 } 134 135 constructor() { 136 if (arguments[0] === diplomatRuntime.exposeConstructor) { 137 return this.#internalConstructor(...Array.prototype.slice.call(arguments, 1)); 138 } else if (arguments[0] === diplomatRuntime.internalConstructor) { 139 return this.#internalConstructor(...arguments); 140 } else { 141 return this.#defaultConstructor(...arguments); 142 } 143 } 144 }