tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Segmenter.js (9574B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * Intl.Segmenter internal properties.
      7 */
      8 function segmenterLocaleData() {
      9  // Segmenter doesn't support any extension keys.
     10  return {};
     11 }
     12 var segmenterInternalProperties = {
     13  localeData: segmenterLocaleData,
     14  relevantExtensionKeys: [],
     15 };
     16 
     17 /**
     18 * Intl.Segmenter ( [ locales [ , options ] ] )
     19 *
     20 * Compute an internal properties object from |lazySegmenterData|.
     21 */
     22 function resolveSegmenterInternals(lazySegmenterData) {
     23  assert(IsObject(lazySegmenterData), "lazy data not an object?");
     24 
     25  var internalProps = std_Object_create(null);
     26 
     27  var Segmenter = segmenterInternalProperties;
     28 
     29  // Compute effective locale.
     30 
     31  // Step 9.
     32  var localeData = Segmenter.localeData;
     33 
     34  // Step 10.
     35  var r = ResolveLocale(
     36    "Segmenter",
     37    lazySegmenterData.requestedLocales,
     38    lazySegmenterData.opt,
     39    Segmenter.relevantExtensionKeys,
     40    localeData
     41  );
     42 
     43  // Step 11.
     44  internalProps.locale = r.locale;
     45 
     46  // Step 13.
     47  internalProps.granularity = lazySegmenterData.granularity;
     48 
     49  // The caller is responsible for associating |internalProps| with the right
     50  // object using |setInternalProperties|.
     51  return internalProps;
     52 }
     53 
     54 /**
     55 * Returns an object containing the Segmenter internal properties of |obj|.
     56 */
     57 function getSegmenterInternals(obj) {
     58  assert(IsObject(obj), "getSegmenterInternals called with non-object");
     59  assert(
     60    intl_GuardToSegmenter(obj) !== null,
     61    "getSegmenterInternals called with non-Segmenter"
     62  );
     63 
     64  var internals = getIntlObjectInternals(obj);
     65  assert(
     66    internals.type === "Segmenter",
     67    "bad type escaped getIntlObjectInternals"
     68  );
     69 
     70  // If internal properties have already been computed, use them.
     71  var internalProps = maybeInternalProperties(internals);
     72  if (internalProps) {
     73    return internalProps;
     74  }
     75 
     76  // Otherwise it's time to fully create them.
     77  internalProps = resolveSegmenterInternals(internals.lazyData);
     78  setInternalProperties(internals, internalProps);
     79  return internalProps;
     80 }
     81 
     82 /**
     83 * Intl.Segmenter ( [ locales [ , options ] ] )
     84 *
     85 * Initializes an object as a Segmenter.
     86 *
     87 * This method is complicated a moderate bit by its implementing initialization
     88 * as a *lazy* concept.  Everything that must happen now, does -- but we defer
     89 * all the work we can until the object is actually used as a Segmenter.
     90 * This later work occurs in |resolveSegmenterInternals|; steps not noted here
     91 * occur there.
     92 */
     93 function InitializeSegmenter(segmenter, locales, options) {
     94  assert(IsObject(segmenter), "InitializeSegmenter called with non-object");
     95  assert(
     96    intl_GuardToSegmenter(segmenter) !== null,
     97    "InitializeSegmenter called with non-Segmenter"
     98  );
     99 
    100  // Lazy Segmenter data has the following structure:
    101  //
    102  //   {
    103  //     requestedLocales: List of locales,
    104  //
    105  //     opt: // opt object computed in InitializeSegmenter
    106  //       {
    107  //         localeMatcher: "lookup" / "best fit",
    108  //       }
    109  //
    110  //     granularity: "grapheme" / "word" / "sentence",
    111  //   }
    112  //
    113  // Note that lazy data is only installed as a final step of initialization,
    114  // so every Segmenter lazy data object has *all* these properties, never a
    115  // subset of them.
    116  var lazySegmenterData = std_Object_create(null);
    117 
    118  // Step 4.
    119  var requestedLocales = CanonicalizeLocaleList(locales);
    120  lazySegmenterData.requestedLocales = requestedLocales;
    121 
    122  // Step 5.
    123  if (options === undefined) {
    124    options = std_Object_create(null);
    125  } else if (!IsObject(options)) {
    126    ThrowTypeError(
    127      JSMSG_OBJECT_REQUIRED,
    128      options === null ? "null" : typeof options
    129    );
    130  }
    131 
    132  // Step 6.
    133  var opt = NEW_RECORD();
    134  lazySegmenterData.opt = opt;
    135 
    136  // Steps 7-8.
    137  var matcher = GetOption(
    138    options,
    139    "localeMatcher",
    140    "string",
    141    ["lookup", "best fit"],
    142    "best fit"
    143  );
    144  opt.localeMatcher = matcher;
    145 
    146  // Steps 12-13.
    147  var granularity = GetOption(
    148    options,
    149    "granularity",
    150    "string",
    151    ["grapheme", "word", "sentence"],
    152    "grapheme"
    153  );
    154  lazySegmenterData.granularity = granularity;
    155 
    156  // We've done everything that must be done now: mark the lazy data as fully
    157  // computed and install it.
    158  initializeIntlObject(segmenter, "Segmenter", lazySegmenterData);
    159 }
    160 
    161 /**
    162 * Intl.Segmenter.prototype.segment ( string )
    163 *
    164 * Create a new Segments object.
    165 */
    166 function Intl_Segmenter_segment(value) {
    167  // Step 1.
    168  var segmenter = this;
    169 
    170  // Step 2.
    171  if (
    172    !IsObject(segmenter) ||
    173    (segmenter = intl_GuardToSegmenter(segmenter)) === null
    174  ) {
    175    return callFunction(
    176      intl_CallSegmenterMethodIfWrapped,
    177      this,
    178      value,
    179      "Intl_Segmenter_segment"
    180    );
    181  }
    182 
    183  // Ensure the Segmenter internals are resolved.
    184  getSegmenterInternals(segmenter);
    185 
    186  // Step 3.
    187  var string = ToString(value);
    188 
    189  // Step 4.
    190  return intl_CreateSegmentsObject(segmenter, string);
    191 }
    192 
    193 /**
    194 * Intl.Segmenter.prototype.resolvedOptions ()
    195 *
    196 * Returns the resolved options for a Segmenter object.
    197 */
    198 function Intl_Segmenter_resolvedOptions() {
    199  // Step 1.
    200  var segmenter = this;
    201 
    202  // Step 2.
    203  if (
    204    !IsObject(segmenter) ||
    205    (segmenter = intl_GuardToSegmenter(segmenter)) === null
    206  ) {
    207    return callFunction(
    208      intl_CallSegmenterMethodIfWrapped,
    209      this,
    210      "Intl_Segmenter_resolvedOptions"
    211    );
    212  }
    213 
    214  var internals = getSegmenterInternals(segmenter);
    215 
    216  // Steps 3-4.
    217  var options = {
    218    locale: internals.locale,
    219    granularity: internals.granularity,
    220  };
    221 
    222  // Step 5.
    223  return options;
    224 }
    225 
    226 /**
    227 * CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )
    228 */
    229 function CreateSegmentDataObject(string, boundaries) {
    230  assert(typeof string === "string", "CreateSegmentDataObject");
    231  assert(
    232    IsPackedArray(boundaries) && boundaries.length === 3,
    233    "CreateSegmentDataObject"
    234  );
    235 
    236  var startIndex = boundaries[0];
    237  assert(
    238    typeof startIndex === "number" && (startIndex | 0) === startIndex,
    239    "startIndex is an int32-value"
    240  );
    241 
    242  var endIndex = boundaries[1];
    243  assert(
    244    typeof endIndex === "number" && (endIndex | 0) === endIndex,
    245    "endIndex is an int32-value"
    246  );
    247 
    248  // In our implementation |granularity| is encoded in |isWordLike|.
    249  var isWordLike = boundaries[2];
    250  assert(
    251    typeof isWordLike === "boolean" || isWordLike === undefined,
    252    "isWordLike is either a boolean or undefined"
    253  );
    254 
    255  // Step 1 (Not applicable).
    256 
    257  // Step 2.
    258  assert(startIndex >= 0, "startIndex is a positive number");
    259 
    260  // Step 3.
    261  assert(
    262    endIndex <= string.length,
    263    "endIndex is less-than-equals the string length"
    264  );
    265 
    266  // Step 4.
    267  assert(startIndex < endIndex, "startIndex is strictly less than endIndex");
    268 
    269  // Step 6.
    270  var segment = Substring(string, startIndex, endIndex - startIndex);
    271 
    272  // Steps 5, 7-12.
    273  if (isWordLike === undefined) {
    274    return {
    275      segment,
    276      index: startIndex,
    277      input: string,
    278    };
    279  }
    280 
    281  return {
    282    segment,
    283    index: startIndex,
    284    input: string,
    285    isWordLike,
    286  };
    287 }
    288 
    289 /**
    290 * %Segments.prototype%.containing ( index )
    291 *
    292 * Return a Segment Data object describing the segment at the given index. If
    293 * the index exceeds the string bounds, undefined is returned.
    294 */
    295 function Intl_Segments_containing(index) {
    296  // Step 1.
    297  var segments = this;
    298 
    299  // Step 2.
    300  if (
    301    !IsObject(segments) ||
    302    (segments = intl_GuardToSegments(segments)) === null
    303  ) {
    304    return callFunction(
    305      intl_CallSegmentsMethodIfWrapped,
    306      this,
    307      index,
    308      "Intl_Segments_containing"
    309    );
    310  }
    311 
    312  // Step 3 (not applicable).
    313 
    314  // Step 4.
    315  var string = UnsafeGetStringFromReservedSlot(
    316    segments,
    317    INTL_SEGMENTS_STRING_SLOT
    318  );
    319 
    320  // Step 5.
    321  var len = string.length;
    322 
    323  // Step 6.
    324  var n = ToInteger(index);
    325 
    326  // Step 7.
    327  if (n < 0 || n >= len) {
    328    return undefined;
    329  }
    330 
    331  // Steps 8-9.
    332  var boundaries = intl_FindSegmentBoundaries(segments, n | 0);
    333 
    334  // Step 10.
    335  return CreateSegmentDataObject(string, boundaries);
    336 }
    337 
    338 /**
    339 * %Segments.prototype% [ @@iterator ] ()
    340 *
    341 * Create a new Segment Iterator object.
    342 */
    343 function Intl_Segments_iterator() {
    344  // Step 1.
    345  var segments = this;
    346 
    347  // Step 2.
    348  if (
    349    !IsObject(segments) ||
    350    (segments = intl_GuardToSegments(segments)) === null
    351  ) {
    352    return callFunction(
    353      intl_CallSegmentsMethodIfWrapped,
    354      this,
    355      "Intl_Segments_iterator"
    356    );
    357  }
    358 
    359  // Steps 3-5.
    360  return intl_CreateSegmentIterator(segments);
    361 }
    362 
    363 /**
    364 * %SegmentIterator.prototype%.next ()
    365 *
    366 * Advance the Segment iterator to the next segment within the string.
    367 */
    368 function Intl_SegmentIterator_next() {
    369  // Step 1.
    370  var iterator = this;
    371 
    372  // Step 2.
    373  if (
    374    !IsObject(iterator) ||
    375    (iterator = intl_GuardToSegmentIterator(iterator)) === null)
    376  {
    377    return callFunction(
    378      intl_CallSegmentIteratorMethodIfWrapped,
    379      this,
    380      "Intl_SegmentIterator_next"
    381    );
    382  }
    383 
    384  // Step 3 (Not applicable).
    385 
    386  // Step 4.
    387  var string = UnsafeGetStringFromReservedSlot(
    388    iterator,
    389    INTL_SEGMENT_ITERATOR_STRING_SLOT
    390  );
    391 
    392  // Step 5.
    393  var index = UnsafeGetInt32FromReservedSlot(
    394    iterator,
    395    INTL_SEGMENT_ITERATOR_INDEX_SLOT
    396  );
    397 
    398  var result = { value: undefined, done: false };
    399 
    400  // Step 7.
    401  if (index === string.length) {
    402    result.done = true;
    403    return result;
    404  }
    405 
    406  // Steps 6, 8.
    407  var boundaries = intl_FindNextSegmentBoundaries(iterator);
    408 
    409  // Step 9.
    410  result.value = CreateSegmentDataObject(string, boundaries);
    411 
    412  // Step 10.
    413  return result;
    414 }