Segmenter.h (11224B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef builtin_intl_Segmenter_h 8 #define builtin_intl_Segmenter_h 9 10 #include <stdint.h> 11 #include <type_traits> 12 13 #include "builtin/SelfHostingDefines.h" 14 #include "js/Class.h" 15 #include "js/Value.h" 16 #include "vm/NativeObject.h" 17 18 struct JS_PUBLIC_API JSContext; 19 class JSString; 20 21 namespace JS { 22 class GCContext; 23 } 24 25 namespace js { 26 27 enum class SegmenterGranularity : int8_t { Grapheme, Word, Sentence }; 28 29 class SegmenterObject : public NativeObject { 30 public: 31 static const JSClass class_; 32 static const JSClass& protoClass_; 33 34 static constexpr uint32_t INTERNALS_SLOT = 0; 35 static constexpr uint32_t LOCALE_SLOT = 1; 36 static constexpr uint32_t GRANULARITY_SLOT = 2; 37 static constexpr uint32_t SEGMENTER_SLOT = 3; 38 static constexpr uint32_t SLOT_COUNT = 4; 39 40 static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT, 41 "INTERNALS_SLOT must match self-hosting define for internals " 42 "object slot"); 43 44 JSString* getLocale() const { 45 const auto& slot = getFixedSlot(LOCALE_SLOT); 46 if (slot.isUndefined()) { 47 return nullptr; 48 } 49 return slot.toString(); 50 } 51 52 void setLocale(JSString* locale) { 53 setFixedSlot(LOCALE_SLOT, StringValue(locale)); 54 } 55 56 SegmenterGranularity getGranularity() const { 57 const auto& slot = getFixedSlot(GRANULARITY_SLOT); 58 if (slot.isUndefined()) { 59 return SegmenterGranularity::Grapheme; 60 } 61 return static_cast<SegmenterGranularity>(slot.toInt32()); 62 } 63 64 void setGranularity(SegmenterGranularity granularity) { 65 setFixedSlot(GRANULARITY_SLOT, 66 Int32Value(static_cast<int32_t>(granularity))); 67 } 68 69 void* getSegmenter() const { 70 const auto& slot = getFixedSlot(SEGMENTER_SLOT); 71 if (slot.isUndefined()) { 72 return nullptr; 73 } 74 return slot.toPrivate(); 75 } 76 77 void setSegmenter(void* brk) { 78 setFixedSlot(SEGMENTER_SLOT, PrivateValue(brk)); 79 } 80 81 private: 82 static const ClassSpec classSpec_; 83 static const JSClassOps classOps_; 84 85 static void finalize(JS::GCContext* gcx, JSObject* obj); 86 }; 87 88 class SegmentsStringChars final { 89 uintptr_t tagged_ = 0; 90 91 enum Tag { 92 Latin1 = 0, 93 TwoByte = 1, 94 95 TagMask = TwoByte, 96 }; 97 98 static uintptr_t toTagged(const void* chars, Tag tag) { 99 MOZ_ASSERT(chars != nullptr, "can't tag nullptr"); 100 101 auto ptr = reinterpret_cast<uintptr_t>(chars); 102 MOZ_ASSERT((ptr & TagMask) == 0, "pointer already tagged"); 103 104 return ptr | tag; 105 } 106 107 Tag tag() const { return static_cast<Tag>(tagged_ & TagMask); } 108 109 uintptr_t untagged() const { return tagged_ & ~TagMask; } 110 111 explicit SegmentsStringChars(const void* taggedChars) 112 : tagged_(reinterpret_cast<uintptr_t>(taggedChars)) {} 113 114 public: 115 SegmentsStringChars() = default; 116 117 explicit SegmentsStringChars(const JS::Latin1Char* chars) 118 : tagged_(toTagged(chars, Latin1)) {} 119 120 explicit SegmentsStringChars(const char16_t* chars) 121 : tagged_(toTagged(chars, TwoByte)) {} 122 123 static auto fromTagged(const void* taggedChars) { 124 return SegmentsStringChars{taggedChars}; 125 } 126 127 explicit operator bool() const { return tagged_ != 0; } 128 129 template <typename CharT> 130 bool has() const { 131 if constexpr (std::is_same_v<CharT, JS::Latin1Char>) { 132 return tag() == Latin1; 133 } else { 134 static_assert(std::is_same_v<CharT, char16_t>); 135 return tag() == TwoByte; 136 } 137 } 138 139 template <typename CharT> 140 CharT* data() const { 141 MOZ_ASSERT(has<CharT>()); 142 return reinterpret_cast<CharT*>(untagged()); 143 } 144 145 uintptr_t tagged() const { return tagged_; } 146 }; 147 148 class SegmentsObject : public NativeObject { 149 public: 150 static const JSClass class_; 151 152 static constexpr uint32_t SEGMENTER_SLOT = 0; 153 static constexpr uint32_t STRING_SLOT = 1; 154 static constexpr uint32_t STRING_CHARS_SLOT = 2; 155 static constexpr uint32_t INDEX_SLOT = 3; 156 static constexpr uint32_t GRANULARITY_SLOT = 4; 157 static constexpr uint32_t BREAK_ITERATOR_SLOT = 5; 158 static constexpr uint32_t SLOT_COUNT = 6; 159 160 static_assert(STRING_SLOT == INTL_SEGMENTS_STRING_SLOT, 161 "STRING_SLOT must match self-hosting define for string slot"); 162 163 SegmenterObject* getSegmenter() const { 164 const auto& slot = getFixedSlot(SEGMENTER_SLOT); 165 if (slot.isUndefined()) { 166 return nullptr; 167 } 168 return &slot.toObject().as<SegmenterObject>(); 169 } 170 171 void setSegmenter(SegmenterObject* segmenter) { 172 setFixedSlot(SEGMENTER_SLOT, ObjectValue(*segmenter)); 173 } 174 175 JSString* getString() const { 176 const auto& slot = getFixedSlot(STRING_SLOT); 177 if (slot.isUndefined()) { 178 return nullptr; 179 } 180 return slot.toString(); 181 } 182 183 void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); } 184 185 bool hasStringChars() const { 186 return !getFixedSlot(STRING_CHARS_SLOT).isUndefined(); 187 } 188 189 SegmentsStringChars getStringChars() const { 190 const auto& slot = getFixedSlot(STRING_CHARS_SLOT); 191 if (slot.isUndefined()) { 192 return SegmentsStringChars{}; 193 } 194 return SegmentsStringChars::fromTagged(slot.toPrivate()); 195 } 196 197 void setStringChars(SegmentsStringChars chars) { 198 setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged())); 199 } 200 201 bool hasLatin1StringChars() const { 202 MOZ_ASSERT(hasStringChars()); 203 return getStringChars().has<JS::Latin1Char>(); 204 } 205 206 int32_t getIndex() const { 207 const auto& slot = getFixedSlot(INDEX_SLOT); 208 if (slot.isUndefined()) { 209 return 0; 210 } 211 return slot.toInt32(); 212 } 213 214 void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); } 215 216 SegmenterGranularity getGranularity() const { 217 const auto& slot = getFixedSlot(GRANULARITY_SLOT); 218 if (slot.isUndefined()) { 219 return SegmenterGranularity::Grapheme; 220 } 221 return static_cast<SegmenterGranularity>(slot.toInt32()); 222 } 223 224 void setGranularity(SegmenterGranularity granularity) { 225 setFixedSlot(GRANULARITY_SLOT, 226 Int32Value(static_cast<int32_t>(granularity))); 227 } 228 229 void* getBreakIterator() const { 230 const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT); 231 if (slot.isUndefined()) { 232 return nullptr; 233 } 234 return slot.toPrivate(); 235 } 236 237 void setBreakIterator(void* brk) { 238 setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk)); 239 } 240 241 private: 242 static const JSClassOps classOps_; 243 244 static void finalize(JS::GCContext* gcx, JSObject* obj); 245 }; 246 247 class SegmentIteratorObject : public NativeObject { 248 public: 249 static const JSClass class_; 250 251 static constexpr uint32_t SEGMENTER_SLOT = 0; 252 static constexpr uint32_t STRING_SLOT = 1; 253 static constexpr uint32_t STRING_CHARS_SLOT = 2; 254 static constexpr uint32_t INDEX_SLOT = 3; 255 static constexpr uint32_t GRANULARITY_SLOT = 4; 256 static constexpr uint32_t BREAK_ITERATOR_SLOT = 5; 257 static constexpr uint32_t SLOT_COUNT = 6; 258 259 static_assert(STRING_SLOT == INTL_SEGMENT_ITERATOR_STRING_SLOT, 260 "STRING_SLOT must match self-hosting define for string slot"); 261 262 static_assert(INDEX_SLOT == INTL_SEGMENT_ITERATOR_INDEX_SLOT, 263 "INDEX_SLOT must match self-hosting define for index slot"); 264 265 SegmenterObject* getSegmenter() const { 266 const auto& slot = getFixedSlot(SEGMENTER_SLOT); 267 if (slot.isUndefined()) { 268 return nullptr; 269 } 270 return &slot.toObject().as<SegmenterObject>(); 271 } 272 273 void setSegmenter(SegmenterObject* segmenter) { 274 setFixedSlot(SEGMENTER_SLOT, ObjectOrNullValue(segmenter)); 275 } 276 277 JSString* getString() const { 278 const auto& slot = getFixedSlot(STRING_SLOT); 279 if (slot.isUndefined()) { 280 return nullptr; 281 } 282 return slot.toString(); 283 } 284 285 void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); } 286 287 bool hasStringChars() const { 288 return !getFixedSlot(STRING_CHARS_SLOT).isUndefined(); 289 } 290 291 SegmentsStringChars getStringChars() const { 292 const auto& slot = getFixedSlot(STRING_CHARS_SLOT); 293 if (slot.isUndefined()) { 294 return SegmentsStringChars{}; 295 } 296 return SegmentsStringChars::fromTagged(slot.toPrivate()); 297 } 298 299 void setStringChars(SegmentsStringChars chars) { 300 setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged())); 301 } 302 303 bool hasLatin1StringChars() const { 304 MOZ_ASSERT(hasStringChars()); 305 return getStringChars().has<JS::Latin1Char>(); 306 } 307 308 int32_t getIndex() const { 309 const auto& slot = getFixedSlot(INDEX_SLOT); 310 if (slot.isUndefined()) { 311 return 0; 312 } 313 return slot.toInt32(); 314 } 315 316 void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); } 317 318 SegmenterGranularity getGranularity() const { 319 const auto& slot = getFixedSlot(GRANULARITY_SLOT); 320 if (slot.isUndefined()) { 321 return SegmenterGranularity::Grapheme; 322 } 323 return static_cast<SegmenterGranularity>(slot.toInt32()); 324 } 325 326 void setGranularity(SegmenterGranularity granularity) { 327 setFixedSlot(GRANULARITY_SLOT, 328 Int32Value(static_cast<int32_t>(granularity))); 329 } 330 331 void* getBreakIterator() const { 332 const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT); 333 if (slot.isUndefined()) { 334 return nullptr; 335 } 336 return slot.toPrivate(); 337 } 338 339 void setBreakIterator(void* brk) { 340 setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk)); 341 } 342 343 private: 344 static const JSClassOps classOps_; 345 346 static void finalize(JS::GCContext* gcx, JSObject* obj); 347 }; 348 349 /** 350 * Create a new Segments object. 351 * 352 * Usage: segment = intl_CreateSegmentsObject(segmenter, string) 353 */ 354 [[nodiscard]] extern bool intl_CreateSegmentsObject(JSContext* cx, 355 unsigned argc, Value* vp); 356 357 /** 358 * Create a new Segment Iterator object. 359 * 360 * Usage: iterator = intl_CreateSegmentIterator(segments) 361 */ 362 [[nodiscard]] extern bool intl_CreateSegmentIterator(JSContext* cx, 363 unsigned argc, Value* vp); 364 365 /** 366 * Find the next and the preceding segment boundaries for the given index. The 367 * index must be a valid string index within the segmenter string. 368 * 369 * Return a three-element array object `[startIndex, endIndex, wordLike]`, where 370 * `wordLike` is either a boolean or undefined for non-word segmenters. 371 * 372 * Usage: boundaries = intl_FindSegmentBoundaries(segments, index) 373 */ 374 [[nodiscard]] extern bool intl_FindSegmentBoundaries(JSContext* cx, 375 unsigned argc, Value* vp); 376 377 /** 378 * Find the next segment boundaries starting from the current iterator index. 379 * The iterator mustn't have been completed. 380 * 381 * Return a three-element array object `[startIndex, endIndex, wordLike]`, where 382 * `wordLike` is either a boolean or undefined for non-word segmenters. 383 * 384 * Usage: boundaries = intl_FindNextSegmentBoundaries(iterator) 385 */ 386 [[nodiscard]] extern bool intl_FindNextSegmentBoundaries(JSContext* cx, 387 unsigned argc, 388 Value* vp); 389 390 } // namespace js 391 392 #endif /* builtin_intl_Segmenter_h */