hb-set-digest.hh (5454B)
1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 #include "hb-machinery.hh" 32 33 /* 34 * The set-digests implement "filters" that support "approximate 35 * member query". Conceptually these are like Bloom Filter and 36 * Quotient Filter, however, much smaller, faster, and designed 37 * to fit the requirements of our uses for glyph coverage queries. 38 * 39 * Our filters are highly accurate if the lookup covers fairly local 40 * set of glyphs, but fully flooded and ineffective if coverage is 41 * all over the place. 42 * 43 * The way these are used is that the filter is first populated by 44 * a lookup's or subtable's Coverage table(s), and then when we 45 * want to apply the lookup or subtable to a glyph, before trying 46 * to apply, we ask the filter if the glyph may be covered. If it's 47 * not, we return early. We can also match a digest against another 48 * digest. 49 * 50 * We use these filters at three levels: 51 * - If the digest for all the glyphs in the buffer as a whole 52 * does not match the digest for the lookup, skip the lookup. 53 * - For each glyph, if it doesn't match the lookup digest, 54 * skip it. 55 * - For each glyph, if it doesn't match the subtable digest, 56 * skip it. 57 * 58 * The filter we use is a combination of three bits-pattern 59 * filters. A bits-pattern filter checks a number of bits (5 or 6) 60 * of the input number (glyph-id in most cases) and checks whether 61 * its pattern is amongst the patterns of any of the accepted values. 62 * The accepted patterns are represented as a "long" integer. Each 63 * check is done using four bitwise operations only. 64 */ 65 66 static constexpr unsigned hb_set_digest_shifts[] = {4, 0, 6}; 67 68 struct hb_set_digest_t 69 { 70 // No science in these. Intuition and testing only. 71 using mask_t = uint64_t; 72 73 static constexpr unsigned n = ARRAY_LENGTH_CONST (hb_set_digest_shifts); 74 static constexpr unsigned mask_bytes = sizeof (mask_t); 75 static constexpr unsigned mask_bits = sizeof (mask_t) * 8; 76 static constexpr hb_codepoint_t mb1 = mask_bits - 1; 77 static constexpr mask_t one = 1; 78 static constexpr mask_t all = (mask_t) -1; 79 80 void init () 81 { for (unsigned i = 0; i < n; i++) masks[i] = 0; } 82 83 void clear () { init (); } 84 85 static hb_set_digest_t full () 86 { 87 hb_set_digest_t d; 88 for (unsigned i = 0; i < n; i++) d.masks[i] = all; 89 return d; 90 } 91 92 void union_ (const hb_set_digest_t &o) 93 { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; } 94 95 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 96 { 97 bool ret; 98 99 ret = false; 100 for (unsigned i = 0; i < n; i++) 101 if (masks[i] != all) 102 ret = true; 103 if (!ret) return false; 104 105 ret = false; 106 for (unsigned i = 0; i < n; i++) 107 { 108 mask_t shift = hb_set_digest_shifts[i]; 109 if ((b >> shift) - (a >> shift) >= mb1) 110 masks[i] = all; 111 else 112 { 113 mask_t ma = one << ((a >> shift) & mb1); 114 mask_t mb = one << ((b >> shift) & mb1); 115 masks[i] |= mb + (mb - ma) - (mb < ma); 116 ret = true; 117 } 118 } 119 return ret; 120 } 121 122 template <typename T> 123 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 124 { 125 for (unsigned int i = 0; i < count; i++) 126 { 127 add (*array); 128 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); 129 } 130 } 131 template <typename T> 132 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 133 template <typename T> 134 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 135 { 136 add_array (array, count, stride); 137 return true; 138 } 139 template <typename T> 140 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 141 142 bool operator [] (hb_codepoint_t g) const 143 { return may_have (g); } 144 145 146 void add (hb_codepoint_t g) 147 { 148 for (unsigned i = 0; i < n; i++) 149 masks[i] |= one << ((g >> hb_set_digest_shifts[i]) & mb1); 150 } 151 152 HB_ALWAYS_INLINE 153 bool may_have (hb_codepoint_t g) const 154 { 155 for (unsigned i = 0; i < n; i++) 156 if (!(masks[i] & (one << ((g >> hb_set_digest_shifts[i]) & mb1)))) 157 return false; 158 return true; 159 } 160 161 bool may_intersect (const hb_set_digest_t &o) const 162 { 163 for (unsigned i = 0; i < n; i++) 164 if (!(masks[i] & o.masks[i])) 165 return false; 166 return true; 167 } 168 169 private: 170 171 mask_t masks[n] = {}; 172 }; 173 174 175 #endif /* HB_SET_DIGEST_HH */