tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-set-digest.hh (5454B)


      1 /*
      2 * Copyright © 2012  Google, Inc.
      3 *
      4 *  This is part of HarfBuzz, a text shaping library.
      5 *
      6 * Permission is hereby granted, without written agreement and without
      7 * license or royalty fees, to use, copy, modify, and distribute this
      8 * software and its documentation for any purpose, provided that the
      9 * above copyright notice and the following two paragraphs appear in
     10 * all copies of this software.
     11 *
     12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16 * DAMAGE.
     17 *
     18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23 *
     24 * Google Author(s): Behdad Esfahbod
     25 */
     26 
     27 #ifndef HB_SET_DIGEST_HH
     28 #define HB_SET_DIGEST_HH
     29 
     30 #include "hb.hh"
     31 #include "hb-machinery.hh"
     32 
     33 /*
     34 * The set-digests implement "filters" that support "approximate
     35 * member query".  Conceptually these are like Bloom Filter and
     36 * Quotient Filter, however, much smaller, faster, and designed
     37 * to fit the requirements of our uses for glyph coverage queries.
     38 *
     39 * Our filters are highly accurate if the lookup covers fairly local
     40 * set of glyphs, but fully flooded and ineffective if coverage is
     41 * all over the place.
     42 *
     43 * The way these are used is that the filter is first populated by
     44 * a lookup's or subtable's Coverage table(s), and then when we
     45 * want to apply the lookup or subtable to a glyph, before trying
     46 * to apply, we ask the filter if the glyph may be covered. If it's
     47 * not, we return early.  We can also match a digest against another
     48 * digest.
     49 *
     50 * We use these filters at three levels:
     51 *   - If the digest for all the glyphs in the buffer as a whole
     52 *     does not match the digest for the lookup, skip the lookup.
     53 *   - For each glyph, if it doesn't match the lookup digest,
     54 *     skip it.
     55 *   - For each glyph, if it doesn't match the subtable digest,
     56 *     skip it.
     57 *
     58 * The filter we use is a combination of three bits-pattern
     59 * filters. A bits-pattern filter checks a number of bits (5 or 6)
     60 * of the input number (glyph-id in most cases) and checks whether
     61 * its pattern is amongst the patterns of any of the accepted values.
     62 * The accepted patterns are represented as a "long" integer. Each
     63 * check is done using four bitwise operations only.
     64 */
     65 
     66 static constexpr unsigned hb_set_digest_shifts[] = {4, 0, 6};
     67 
     68 struct hb_set_digest_t
     69 {
     70  // No science in these. Intuition and testing only.
     71  using mask_t = uint64_t;
     72 
     73  static constexpr unsigned n = ARRAY_LENGTH_CONST (hb_set_digest_shifts);
     74  static constexpr unsigned mask_bytes = sizeof (mask_t);
     75  static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
     76  static constexpr hb_codepoint_t mb1 = mask_bits - 1;
     77  static constexpr mask_t one = 1;
     78  static constexpr mask_t all = (mask_t) -1;
     79 
     80  void init ()
     81  { for (unsigned i = 0; i < n; i++) masks[i] = 0; }
     82 
     83  void clear () { init (); }
     84 
     85  static hb_set_digest_t full ()
     86  {
     87    hb_set_digest_t d;
     88    for (unsigned i = 0; i < n; i++) d.masks[i] = all;
     89    return d;
     90  }
     91 
     92  void union_ (const hb_set_digest_t &o)
     93  { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; }
     94 
     95  bool add_range (hb_codepoint_t a, hb_codepoint_t b)
     96  {
     97    bool ret;
     98 
     99    ret = false;
    100    for (unsigned i = 0; i < n; i++)
    101      if (masks[i] != all)
    102 ret = true;
    103    if (!ret) return false;
    104 
    105    ret = false;
    106    for (unsigned i = 0; i < n; i++)
    107    {
    108      mask_t shift = hb_set_digest_shifts[i];
    109      if ((b >> shift) - (a >> shift) >= mb1)
    110 masks[i] = all;
    111      else
    112      {
    113 mask_t ma = one << ((a >> shift) & mb1);
    114 mask_t mb = one << ((b >> shift) & mb1);
    115 masks[i] |= mb + (mb - ma) - (mb < ma);
    116 ret = true;
    117      }
    118    }
    119    return ret;
    120  }
    121 
    122  template <typename T>
    123  void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    124  {
    125    for (unsigned int i = 0; i < count; i++)
    126    {
    127      add (*array);
    128      array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
    129    }
    130  }
    131  template <typename T>
    132  void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
    133  template <typename T>
    134  bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    135  {
    136    add_array (array, count, stride);
    137    return true;
    138  }
    139  template <typename T>
    140  bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
    141 
    142  bool operator [] (hb_codepoint_t g) const
    143  { return may_have (g); }
    144 
    145 
    146  void add (hb_codepoint_t g)
    147  {
    148    for (unsigned i = 0; i < n; i++)
    149      masks[i] |= one << ((g >> hb_set_digest_shifts[i]) & mb1);
    150  }
    151 
    152  HB_ALWAYS_INLINE
    153  bool may_have (hb_codepoint_t g) const
    154  {
    155    for (unsigned i = 0; i < n; i++)
    156      if (!(masks[i] & (one << ((g >> hb_set_digest_shifts[i]) & mb1))))
    157 return false;
    158    return true;
    159  }
    160 
    161  bool may_intersect (const hb_set_digest_t &o) const
    162  {
    163    for (unsigned i = 0; i < n; i++)
    164      if (!(masks[i] & o.masks[i]))
    165 return false;
    166    return true;
    167  }
    168 
    169  private:
    170 
    171  mask_t masks[n] = {};
    172 };
    173 
    174 
    175 #endif /* HB_SET_DIGEST_HH */