tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Segmenter.h (11224B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef builtin_intl_Segmenter_h
      8 #define builtin_intl_Segmenter_h
      9 
     10 #include <stdint.h>
     11 #include <type_traits>
     12 
     13 #include "builtin/SelfHostingDefines.h"
     14 #include "js/Class.h"
     15 #include "js/Value.h"
     16 #include "vm/NativeObject.h"
     17 
     18 struct JS_PUBLIC_API JSContext;
     19 class JSString;
     20 
     21 namespace JS {
     22 class GCContext;
     23 }
     24 
     25 namespace js {
     26 
     27 enum class SegmenterGranularity : int8_t { Grapheme, Word, Sentence };
     28 
     29 class SegmenterObject : public NativeObject {
     30 public:
     31  static const JSClass class_;
     32  static const JSClass& protoClass_;
     33 
     34  static constexpr uint32_t INTERNALS_SLOT = 0;
     35  static constexpr uint32_t LOCALE_SLOT = 1;
     36  static constexpr uint32_t GRANULARITY_SLOT = 2;
     37  static constexpr uint32_t SEGMENTER_SLOT = 3;
     38  static constexpr uint32_t SLOT_COUNT = 4;
     39 
     40  static_assert(INTERNALS_SLOT == INTL_INTERNALS_OBJECT_SLOT,
     41                "INTERNALS_SLOT must match self-hosting define for internals "
     42                "object slot");
     43 
     44  JSString* getLocale() const {
     45    const auto& slot = getFixedSlot(LOCALE_SLOT);
     46    if (slot.isUndefined()) {
     47      return nullptr;
     48    }
     49    return slot.toString();
     50  }
     51 
     52  void setLocale(JSString* locale) {
     53    setFixedSlot(LOCALE_SLOT, StringValue(locale));
     54  }
     55 
     56  SegmenterGranularity getGranularity() const {
     57    const auto& slot = getFixedSlot(GRANULARITY_SLOT);
     58    if (slot.isUndefined()) {
     59      return SegmenterGranularity::Grapheme;
     60    }
     61    return static_cast<SegmenterGranularity>(slot.toInt32());
     62  }
     63 
     64  void setGranularity(SegmenterGranularity granularity) {
     65    setFixedSlot(GRANULARITY_SLOT,
     66                 Int32Value(static_cast<int32_t>(granularity)));
     67  }
     68 
     69  void* getSegmenter() const {
     70    const auto& slot = getFixedSlot(SEGMENTER_SLOT);
     71    if (slot.isUndefined()) {
     72      return nullptr;
     73    }
     74    return slot.toPrivate();
     75  }
     76 
     77  void setSegmenter(void* brk) {
     78    setFixedSlot(SEGMENTER_SLOT, PrivateValue(brk));
     79  }
     80 
     81 private:
     82  static const ClassSpec classSpec_;
     83  static const JSClassOps classOps_;
     84 
     85  static void finalize(JS::GCContext* gcx, JSObject* obj);
     86 };
     87 
     88 class SegmentsStringChars final {
     89  uintptr_t tagged_ = 0;
     90 
     91  enum Tag {
     92    Latin1 = 0,
     93    TwoByte = 1,
     94 
     95    TagMask = TwoByte,
     96  };
     97 
     98  static uintptr_t toTagged(const void* chars, Tag tag) {
     99    MOZ_ASSERT(chars != nullptr, "can't tag nullptr");
    100 
    101    auto ptr = reinterpret_cast<uintptr_t>(chars);
    102    MOZ_ASSERT((ptr & TagMask) == 0, "pointer already tagged");
    103 
    104    return ptr | tag;
    105  }
    106 
    107  Tag tag() const { return static_cast<Tag>(tagged_ & TagMask); }
    108 
    109  uintptr_t untagged() const { return tagged_ & ~TagMask; }
    110 
    111  explicit SegmentsStringChars(const void* taggedChars)
    112      : tagged_(reinterpret_cast<uintptr_t>(taggedChars)) {}
    113 
    114 public:
    115  SegmentsStringChars() = default;
    116 
    117  explicit SegmentsStringChars(const JS::Latin1Char* chars)
    118      : tagged_(toTagged(chars, Latin1)) {}
    119 
    120  explicit SegmentsStringChars(const char16_t* chars)
    121      : tagged_(toTagged(chars, TwoByte)) {}
    122 
    123  static auto fromTagged(const void* taggedChars) {
    124    return SegmentsStringChars{taggedChars};
    125  }
    126 
    127  explicit operator bool() const { return tagged_ != 0; }
    128 
    129  template <typename CharT>
    130  bool has() const {
    131    if constexpr (std::is_same_v<CharT, JS::Latin1Char>) {
    132      return tag() == Latin1;
    133    } else {
    134      static_assert(std::is_same_v<CharT, char16_t>);
    135      return tag() == TwoByte;
    136    }
    137  }
    138 
    139  template <typename CharT>
    140  CharT* data() const {
    141    MOZ_ASSERT(has<CharT>());
    142    return reinterpret_cast<CharT*>(untagged());
    143  }
    144 
    145  uintptr_t tagged() const { return tagged_; }
    146 };
    147 
    148 class SegmentsObject : public NativeObject {
    149 public:
    150  static const JSClass class_;
    151 
    152  static constexpr uint32_t SEGMENTER_SLOT = 0;
    153  static constexpr uint32_t STRING_SLOT = 1;
    154  static constexpr uint32_t STRING_CHARS_SLOT = 2;
    155  static constexpr uint32_t INDEX_SLOT = 3;
    156  static constexpr uint32_t GRANULARITY_SLOT = 4;
    157  static constexpr uint32_t BREAK_ITERATOR_SLOT = 5;
    158  static constexpr uint32_t SLOT_COUNT = 6;
    159 
    160  static_assert(STRING_SLOT == INTL_SEGMENTS_STRING_SLOT,
    161                "STRING_SLOT must match self-hosting define for string slot");
    162 
    163  SegmenterObject* getSegmenter() const {
    164    const auto& slot = getFixedSlot(SEGMENTER_SLOT);
    165    if (slot.isUndefined()) {
    166      return nullptr;
    167    }
    168    return &slot.toObject().as<SegmenterObject>();
    169  }
    170 
    171  void setSegmenter(SegmenterObject* segmenter) {
    172    setFixedSlot(SEGMENTER_SLOT, ObjectValue(*segmenter));
    173  }
    174 
    175  JSString* getString() const {
    176    const auto& slot = getFixedSlot(STRING_SLOT);
    177    if (slot.isUndefined()) {
    178      return nullptr;
    179    }
    180    return slot.toString();
    181  }
    182 
    183  void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); }
    184 
    185  bool hasStringChars() const {
    186    return !getFixedSlot(STRING_CHARS_SLOT).isUndefined();
    187  }
    188 
    189  SegmentsStringChars getStringChars() const {
    190    const auto& slot = getFixedSlot(STRING_CHARS_SLOT);
    191    if (slot.isUndefined()) {
    192      return SegmentsStringChars{};
    193    }
    194    return SegmentsStringChars::fromTagged(slot.toPrivate());
    195  }
    196 
    197  void setStringChars(SegmentsStringChars chars) {
    198    setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged()));
    199  }
    200 
    201  bool hasLatin1StringChars() const {
    202    MOZ_ASSERT(hasStringChars());
    203    return getStringChars().has<JS::Latin1Char>();
    204  }
    205 
    206  int32_t getIndex() const {
    207    const auto& slot = getFixedSlot(INDEX_SLOT);
    208    if (slot.isUndefined()) {
    209      return 0;
    210    }
    211    return slot.toInt32();
    212  }
    213 
    214  void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); }
    215 
    216  SegmenterGranularity getGranularity() const {
    217    const auto& slot = getFixedSlot(GRANULARITY_SLOT);
    218    if (slot.isUndefined()) {
    219      return SegmenterGranularity::Grapheme;
    220    }
    221    return static_cast<SegmenterGranularity>(slot.toInt32());
    222  }
    223 
    224  void setGranularity(SegmenterGranularity granularity) {
    225    setFixedSlot(GRANULARITY_SLOT,
    226                 Int32Value(static_cast<int32_t>(granularity)));
    227  }
    228 
    229  void* getBreakIterator() const {
    230    const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT);
    231    if (slot.isUndefined()) {
    232      return nullptr;
    233    }
    234    return slot.toPrivate();
    235  }
    236 
    237  void setBreakIterator(void* brk) {
    238    setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk));
    239  }
    240 
    241 private:
    242  static const JSClassOps classOps_;
    243 
    244  static void finalize(JS::GCContext* gcx, JSObject* obj);
    245 };
    246 
    247 class SegmentIteratorObject : public NativeObject {
    248 public:
    249  static const JSClass class_;
    250 
    251  static constexpr uint32_t SEGMENTER_SLOT = 0;
    252  static constexpr uint32_t STRING_SLOT = 1;
    253  static constexpr uint32_t STRING_CHARS_SLOT = 2;
    254  static constexpr uint32_t INDEX_SLOT = 3;
    255  static constexpr uint32_t GRANULARITY_SLOT = 4;
    256  static constexpr uint32_t BREAK_ITERATOR_SLOT = 5;
    257  static constexpr uint32_t SLOT_COUNT = 6;
    258 
    259  static_assert(STRING_SLOT == INTL_SEGMENT_ITERATOR_STRING_SLOT,
    260                "STRING_SLOT must match self-hosting define for string slot");
    261 
    262  static_assert(INDEX_SLOT == INTL_SEGMENT_ITERATOR_INDEX_SLOT,
    263                "INDEX_SLOT must match self-hosting define for index slot");
    264 
    265  SegmenterObject* getSegmenter() const {
    266    const auto& slot = getFixedSlot(SEGMENTER_SLOT);
    267    if (slot.isUndefined()) {
    268      return nullptr;
    269    }
    270    return &slot.toObject().as<SegmenterObject>();
    271  }
    272 
    273  void setSegmenter(SegmenterObject* segmenter) {
    274    setFixedSlot(SEGMENTER_SLOT, ObjectOrNullValue(segmenter));
    275  }
    276 
    277  JSString* getString() const {
    278    const auto& slot = getFixedSlot(STRING_SLOT);
    279    if (slot.isUndefined()) {
    280      return nullptr;
    281    }
    282    return slot.toString();
    283  }
    284 
    285  void setString(JSString* str) { setFixedSlot(STRING_SLOT, StringValue(str)); }
    286 
    287  bool hasStringChars() const {
    288    return !getFixedSlot(STRING_CHARS_SLOT).isUndefined();
    289  }
    290 
    291  SegmentsStringChars getStringChars() const {
    292    const auto& slot = getFixedSlot(STRING_CHARS_SLOT);
    293    if (slot.isUndefined()) {
    294      return SegmentsStringChars{};
    295    }
    296    return SegmentsStringChars::fromTagged(slot.toPrivate());
    297  }
    298 
    299  void setStringChars(SegmentsStringChars chars) {
    300    setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged()));
    301  }
    302 
    303  bool hasLatin1StringChars() const {
    304    MOZ_ASSERT(hasStringChars());
    305    return getStringChars().has<JS::Latin1Char>();
    306  }
    307 
    308  int32_t getIndex() const {
    309    const auto& slot = getFixedSlot(INDEX_SLOT);
    310    if (slot.isUndefined()) {
    311      return 0;
    312    }
    313    return slot.toInt32();
    314  }
    315 
    316  void setIndex(int32_t index) { setFixedSlot(INDEX_SLOT, Int32Value(index)); }
    317 
    318  SegmenterGranularity getGranularity() const {
    319    const auto& slot = getFixedSlot(GRANULARITY_SLOT);
    320    if (slot.isUndefined()) {
    321      return SegmenterGranularity::Grapheme;
    322    }
    323    return static_cast<SegmenterGranularity>(slot.toInt32());
    324  }
    325 
    326  void setGranularity(SegmenterGranularity granularity) {
    327    setFixedSlot(GRANULARITY_SLOT,
    328                 Int32Value(static_cast<int32_t>(granularity)));
    329  }
    330 
    331  void* getBreakIterator() const {
    332    const auto& slot = getFixedSlot(BREAK_ITERATOR_SLOT);
    333    if (slot.isUndefined()) {
    334      return nullptr;
    335    }
    336    return slot.toPrivate();
    337  }
    338 
    339  void setBreakIterator(void* brk) {
    340    setFixedSlot(BREAK_ITERATOR_SLOT, PrivateValue(brk));
    341  }
    342 
    343 private:
    344  static const JSClassOps classOps_;
    345 
    346  static void finalize(JS::GCContext* gcx, JSObject* obj);
    347 };
    348 
    349 /**
    350 * Create a new Segments object.
    351 *
    352 * Usage: segment = intl_CreateSegmentsObject(segmenter, string)
    353 */
    354 [[nodiscard]] extern bool intl_CreateSegmentsObject(JSContext* cx,
    355                                                    unsigned argc, Value* vp);
    356 
    357 /**
    358 * Create a new Segment Iterator object.
    359 *
    360 * Usage: iterator = intl_CreateSegmentIterator(segments)
    361 */
    362 [[nodiscard]] extern bool intl_CreateSegmentIterator(JSContext* cx,
    363                                                     unsigned argc, Value* vp);
    364 
    365 /**
    366 * Find the next and the preceding segment boundaries for the given index. The
    367 * index must be a valid string index within the segmenter string.
    368 *
    369 * Return a three-element array object `[startIndex, endIndex, wordLike]`, where
    370 * `wordLike` is either a boolean or undefined for non-word segmenters.
    371 *
    372 * Usage: boundaries = intl_FindSegmentBoundaries(segments, index)
    373 */
    374 [[nodiscard]] extern bool intl_FindSegmentBoundaries(JSContext* cx,
    375                                                     unsigned argc, Value* vp);
    376 
    377 /**
    378 * Find the next segment boundaries starting from the current iterator index.
    379 * The iterator mustn't have been completed.
    380 *
    381 * Return a three-element array object `[startIndex, endIndex, wordLike]`, where
    382 * `wordLike` is either a boolean or undefined for non-word segmenters.
    383 *
    384 * Usage: boundaries = intl_FindNextSegmentBoundaries(iterator)
    385 */
    386 [[nodiscard]] extern bool intl_FindNextSegmentBoundaries(JSContext* cx,
    387                                                         unsigned argc,
    388                                                         Value* vp);
    389 
    390 }  // namespace js
    391 
    392 #endif /* builtin_intl_Segmenter_h */