WordSegmenter.mjs (12535B)
1 // generated by diplomat-tool 2 import { DataError } from "./DataError.mjs" 3 import { DataProvider } from "./DataProvider.mjs" 4 import { Locale } from "./Locale.mjs" 5 import { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16.mjs" 6 import wasm from "./diplomat-wasm.mjs"; 7 import * as diplomatRuntime from "./diplomat-runtime.mjs"; 8 9 10 /** 11 * An ICU4X word-break segmenter, capable of finding word breakpoints in strings. 12 * 13 * See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information. 14 */ 15 const WordSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => { 16 wasm.icu4x_WordSegmenter_destroy_mv1(ptr); 17 }); 18 19 export class WordSegmenter { 20 // Internal ptr reference: 21 #ptr = null; 22 23 // Lifetimes are only to keep dependencies alive. 24 // Since JS won't garbage collect until there are no incoming edges. 25 #selfEdge = []; 26 27 #internalConstructor(symbol, ptr, selfEdge) { 28 if (symbol !== diplomatRuntime.internalConstructor) { 29 console.error("WordSegmenter is an Opaque type. You cannot call its constructor."); 30 return; 31 } 32 this.#ptr = ptr; 33 this.#selfEdge = selfEdge; 34 35 // Are we being borrowed? If not, we can register. 36 if (this.#selfEdge.length === 0) { 37 WordSegmenter_box_destroy_registry.register(this, this.#ptr); 38 } 39 40 return this; 41 } 42 get ffiValue() { 43 return this.#ptr; 44 } 45 46 47 /** 48 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 49 * or dictionary payload data, using compiled data. This does not assume any content locale. 50 * 51 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 52 * Khmer, Lao, and Thai. 53 * 54 * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information. 55 */ 56 static createAuto() { 57 58 const result = wasm.icu4x_WordSegmenter_create_auto_mv1(); 59 60 try { 61 return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); 62 } 63 64 finally { 65 } 66 } 67 68 /** 69 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 70 * or dictionary payload data, using compiled data. 71 * 72 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 73 * Khmer, Lao, and Thai. 74 * 75 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 76 */ 77 static createAutoWithContentLocale(locale) { 78 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 79 80 81 const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); 82 83 try { 84 if (!diplomatReceive.resultFlag) { 85 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 86 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 87 } 88 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 89 } 90 91 finally { 92 diplomatReceive.free(); 93 } 94 } 95 96 /** 97 * Construct an [`WordSegmenter`] with automatically selecting the best available LSTM 98 * or dictionary payload data, using a particular data source. 99 * 100 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 101 * Khmer, Lao, and Thai. 102 * 103 * See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information. 104 */ 105 static createAutoWithContentLocaleAndProvider(provider, locale) { 106 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 107 108 109 const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); 110 111 try { 112 if (!diplomatReceive.resultFlag) { 113 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 114 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 115 } 116 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 117 } 118 119 finally { 120 diplomatReceive.free(); 121 } 122 } 123 124 /** 125 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 126 * Thai, using compiled data. This does not assume any content locale. 127 * 128 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 129 * Khmer, Lao, and Thai. 130 * 131 * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information. 132 */ 133 static createLstm() { 134 135 const result = wasm.icu4x_WordSegmenter_create_lstm_mv1(); 136 137 try { 138 return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); 139 } 140 141 finally { 142 } 143 } 144 145 /** 146 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 147 * Thai, using compiled data. 148 * 149 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 150 * Khmer, Lao, and Thai. 151 * 152 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 153 */ 154 static createLstmWithContentLocale(locale) { 155 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 156 157 158 const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); 159 160 try { 161 if (!diplomatReceive.resultFlag) { 162 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 163 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 164 } 165 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 166 } 167 168 finally { 169 diplomatReceive.free(); 170 } 171 } 172 173 /** 174 * Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and 175 * Thai, using a particular data source. 176 * 177 * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, 178 * Khmer, Lao, and Thai. 179 * 180 * See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information. 181 */ 182 static createLstmWithContentLocaleAndProvider(provider, locale) { 183 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 184 185 186 const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); 187 188 try { 189 if (!diplomatReceive.resultFlag) { 190 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 191 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 192 } 193 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 194 } 195 196 finally { 197 diplomatReceive.free(); 198 } 199 } 200 201 /** 202 * Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese, 203 * Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale. 204 * 205 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 206 * Khmer, Lao, and Thai. 207 * 208 * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information. 209 */ 210 static createDictionary() { 211 212 const result = wasm.icu4x_WordSegmenter_create_dictionary_mv1(); 213 214 try { 215 return new WordSegmenter(diplomatRuntime.internalConstructor, result, []); 216 } 217 218 finally { 219 } 220 } 221 222 /** 223 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 224 * Burmese, Khmer, Lao, and Thai, using compiled data. 225 * 226 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 227 * Khmer, Lao, and Thai. 228 * 229 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 230 */ 231 static createDictionaryWithContentLocale(locale) { 232 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 233 234 235 const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue); 236 237 try { 238 if (!diplomatReceive.resultFlag) { 239 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 240 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 241 } 242 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 243 } 244 245 finally { 246 diplomatReceive.free(); 247 } 248 } 249 250 /** 251 * Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese, 252 * Burmese, Khmer, Lao, and Thai, using a particular data source. 253 * 254 * Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese, 255 * Khmer, Lao, and Thai. 256 * 257 * See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information. 258 */ 259 static createDictionaryWithContentLocaleAndProvider(provider, locale) { 260 const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); 261 262 263 const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue); 264 265 try { 266 if (!diplomatReceive.resultFlag) { 267 const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer)); 268 throw new globalThis.Error('DataError: ' + cause.value, { cause }); 269 } 270 return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []); 271 } 272 273 finally { 274 diplomatReceive.free(); 275 } 276 } 277 278 /** 279 * Segments a string. 280 * 281 * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according 282 * to the WHATWG Encoding Standard. 283 * 284 * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information. 285 */ 286 segment(input) { 287 let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip(); 288 const inputSlice = diplomatRuntime.DiplomatBuf.str16(wasm, input); 289 // This lifetime edge depends on lifetimes 'a 290 let aEdges = [this, inputSlice]; 291 292 293 const result = wasm.icu4x_WordSegmenter_segment_utf16_mv1(this.ffiValue, ...inputSlice.splat()); 294 295 try { 296 return new WordBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges); 297 } 298 299 finally { 300 functionGarbageCollectorGrip.releaseToGarbageCollector(); 301 302 } 303 } 304 305 constructor(symbol, ptr, selfEdge) { 306 return this.#internalConstructor(...arguments) 307 } 308 }