hb-bit-page.hh (12054B)
1 /* 2 * Copyright © 2012,2017 Google, Inc. 3 * Copyright © 2021 Behdad Esfahbod 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Google Author(s): Behdad Esfahbod 26 */ 27 28 #ifndef HB_BIT_PAGE_HH 29 #define HB_BIT_PAGE_HH 30 31 #include "hb.hh" 32 33 34 /* Compiler-assisted vectorization. */ 35 36 /* Type behaving similar to vectorized vars defined using __attribute__((vector_size(...))), 37 * basically a fixed-size bitset. We can't use the compiler type because hb_vector_t cannot 38 * guarantee alignment requirements. */ 39 template <typename elt_t, unsigned int byte_size> 40 struct hb_vector_size_t 41 { 42 elt_t& operator [] (unsigned int i) { return v[i]; } 43 const elt_t& operator [] (unsigned int i) const { return v[i]; } 44 45 void init0 () 46 { 47 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 48 v[i] = 0; 49 } 50 void init1 () 51 { 52 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 53 v[i] = (elt_t) -1; 54 } 55 56 template <typename Op> 57 hb_vector_size_t process (const Op& op) const 58 { 59 hb_vector_size_t r; 60 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 61 r.v[i] = op (v[i]); 62 return r; 63 } 64 template <typename Op> 65 hb_vector_size_t process (const Op& op, const hb_vector_size_t &o) const 66 { 67 hb_vector_size_t r; 68 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 69 r.v[i] = op (v[i], o.v[i]); 70 return r; 71 } 72 hb_vector_size_t operator | (const hb_vector_size_t &o) const 73 { return process (hb_bitwise_or, o); } 74 hb_vector_size_t operator & (const hb_vector_size_t &o) const 75 { return process (hb_bitwise_and, o); } 76 hb_vector_size_t operator ^ (const hb_vector_size_t &o) const 77 { return process (hb_bitwise_xor, o); } 78 hb_vector_size_t operator ~ () const 79 { return process (hb_bitwise_neg); } 80 81 operator bool () const 82 { 83 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 84 if (v[i]) 85 return true; 86 return false; 87 } 88 operator unsigned int () const 89 { 90 unsigned int r = 0; 91 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 92 r += hb_popcount (v[i]); 93 return r; 94 } 95 bool operator == (const hb_vector_size_t &o) const 96 { 97 for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) 98 if (v[i] != o.v[i]) 99 return false; 100 return true; 101 } 102 103 hb_array_t<const elt_t> iter () const 104 { return hb_array (v); } 105 106 private: 107 static_assert (0 == byte_size % sizeof (elt_t), ""); 108 elt_t v[byte_size / sizeof (elt_t)]; 109 }; 110 111 112 struct hb_bit_page_t 113 { 114 hb_bit_page_t () { init0 (); } 115 116 void init0 () { v.init0 (); population = 0; } 117 void init1 () { v.init1 (); population = PAGE_BITS; } 118 119 void dirty () { population = UINT_MAX; } 120 121 static inline constexpr unsigned len () 122 { return ARRAY_LENGTH_CONST (v); } 123 124 operator bool () const { return !is_empty (); } 125 bool is_empty () const 126 { 127 if (has_population ()) return !population; 128 bool empty = !v; 129 if (empty) population = 0; 130 return empty; 131 } 132 uint32_t hash () const 133 { 134 return hb_bytes_t ((const char *) &v, sizeof (v)).hash (); 135 } 136 137 void add (hb_codepoint_t g) { elt (g) |= mask (g); dirty (); } 138 void del (hb_codepoint_t g) { elt (g) &= ~mask (g); dirty (); } 139 void set (hb_codepoint_t g, bool value) { if (value) add (g); else del (g); } 140 bool get (hb_codepoint_t g) const { return elt (g) & mask (g); } 141 bool may_have (hb_codepoint_t g) const { return get (g); } 142 143 bool operator [] (hb_codepoint_t g) const { return get (g); } 144 bool operator () (hb_codepoint_t g) const { return get (g); } 145 bool has (hb_codepoint_t g) const { return get (g); } 146 147 void add_range (hb_codepoint_t a, hb_codepoint_t b) 148 { 149 elt_t *la = &elt (a); 150 elt_t *lb = &elt (b); 151 if (la == lb) 152 *la |= (mask (b) << 1) - mask(a); 153 else 154 { 155 *la |= ~(mask (a) - 1llu); 156 la++; 157 158 hb_memset (la, 0xff, (char *) lb - (char *) la); 159 160 *lb |= ((mask (b) << 1) - 1llu); 161 } 162 dirty (); 163 } 164 void del_range (hb_codepoint_t a, hb_codepoint_t b) 165 { 166 elt_t *la = &elt (a); 167 elt_t *lb = &elt (b); 168 if (la == lb) 169 *la &= ~((mask (b) << 1llu) - mask(a)); 170 else 171 { 172 *la &= mask (a) - 1; 173 la++; 174 175 hb_memset (la, 0, (char *) lb - (char *) la); 176 177 *lb &= ~((mask (b) << 1) - 1llu); 178 } 179 dirty (); 180 } 181 void set_range (hb_codepoint_t a, hb_codepoint_t b, bool v) 182 { if (v) add_range (a, b); else del_range (a, b); } 183 184 185 // Writes out page values to the array p. Returns the number of values 186 // written. At most size codepoints will be written. 187 unsigned int write (uint32_t base, 188 unsigned int start_value, 189 hb_codepoint_t *p, 190 unsigned int size) const 191 { 192 unsigned int start_v = start_value / ELT_BITS; 193 unsigned int start_bit = start_value & ELT_MASK; 194 unsigned int count = 0; 195 for (unsigned i = start_v; i < len () && count < size; i++) 196 { 197 elt_t bits = v[i]; 198 uint32_t v_base = base | (i * ELT_BITS); 199 for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++) 200 { 201 if ((elt_t(1) << j) & bits) { 202 *p++ = v_base | j; 203 count++; 204 } 205 } 206 start_bit = 0; 207 } 208 return count; 209 } 210 211 // Writes out the values NOT in this page to the array p. Returns the 212 // number of values written. At most size codepoints will be written. 213 // Returns the number of codepoints written. next_value holds the next value 214 // that should be written (if not present in this page). This is used to fill 215 // any missing value gaps between this page and the previous page, if any. 216 // next_value is updated to one more than the last value present in this page. 217 unsigned int write_inverted (uint32_t base, 218 unsigned int start_value, 219 hb_codepoint_t *p, 220 unsigned int size, 221 hb_codepoint_t *next_value) const 222 { 223 unsigned int start_v = start_value / ELT_BITS; 224 unsigned int start_bit = start_value & ELT_MASK; 225 unsigned int count = 0; 226 for (unsigned i = start_v; i < len () && count < size; i++) 227 { 228 elt_t bits = v[i]; 229 uint32_t v_offset = i * ELT_BITS; 230 for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++) 231 { 232 if ((elt_t(1) << j) & bits) 233 { 234 hb_codepoint_t value = base | v_offset | j; 235 // Emit all the missing values from next_value up to value - 1. 236 for (hb_codepoint_t k = *next_value; k < value && count < size; k++) 237 { 238 *p++ = k; 239 count++; 240 } 241 // Skip over this value; 242 *next_value = value + 1; 243 } 244 } 245 start_bit = 0; 246 } 247 return count; 248 } 249 250 bool operator == (const hb_bit_page_t &other) const { return is_equal (other); } 251 bool is_equal (const hb_bit_page_t &other) const { return v == other.v; } 252 bool intersects (const hb_bit_page_t &other) const 253 { 254 for (unsigned i = 0; i < len (); i++) 255 if (v[i] & other.v[i]) 256 return true; 257 return false; 258 } 259 bool may_intersect (const hb_bit_page_t &other) const 260 { return intersects (other); } 261 262 bool operator <= (const hb_bit_page_t &larger_page) const { return is_subset (larger_page); } 263 bool is_subset (const hb_bit_page_t &larger_page) const 264 { 265 if (has_population () && larger_page.has_population () && 266 population > larger_page.population) 267 return false; 268 269 for (unsigned i = 0; i < len (); i++) 270 if (~larger_page.v[i] & v[i]) 271 return false; 272 return true; 273 } 274 275 bool has_population () const { return population != UINT_MAX; } 276 unsigned get_population () const 277 { 278 if (has_population ()) return population; 279 return population = v; 280 } 281 282 bool next (hb_codepoint_t *codepoint) const 283 { 284 unsigned int m = (*codepoint + 1) & MASK; 285 if (!m) 286 { 287 *codepoint = INVALID; 288 return false; 289 } 290 unsigned int i = m / ELT_BITS; 291 unsigned int j = m & ELT_MASK; 292 293 const elt_t vv = v[i] & ~((elt_t (1) << j) - 1); 294 for (const elt_t *p = &vv; i < len (); p = ((const elt_t *) &v[0]) + (++i)) 295 if (*p) 296 { 297 *codepoint = i * ELT_BITS + elt_get_min (*p); 298 return true; 299 } 300 301 *codepoint = INVALID; 302 return false; 303 } 304 bool previous (hb_codepoint_t *codepoint) const 305 { 306 unsigned int m = (*codepoint - 1) & MASK; 307 if (m == MASK) 308 { 309 *codepoint = INVALID; 310 return false; 311 } 312 unsigned int i = m / ELT_BITS; 313 unsigned int j = m & ELT_MASK; 314 315 /* Fancy mask to avoid shifting by elt_t bitsize, which is undefined. */ 316 const elt_t mask = j < 8 * sizeof (elt_t) - 1 ? 317 ((elt_t (1) << (j + 1)) - 1) : 318 (elt_t) -1; 319 const elt_t vv = v[i] & mask; 320 const elt_t *p = &vv; 321 while (true) 322 { 323 if (*p) 324 { 325 *codepoint = i * ELT_BITS + elt_get_max (*p); 326 return true; 327 } 328 if ((int) i <= 0) break; 329 p = &v[--i]; 330 } 331 332 *codepoint = INVALID; 333 return false; 334 } 335 hb_codepoint_t get_min () const 336 { 337 for (unsigned int i = 0; i < len (); i++) 338 if (v[i]) 339 return i * ELT_BITS + elt_get_min (v[i]); 340 return INVALID; 341 } 342 hb_codepoint_t get_max () const 343 { 344 for (int i = len () - 1; i >= 0; i--) 345 if (v[i]) 346 return i * ELT_BITS + elt_get_max (v[i]); 347 return 0; 348 } 349 350 /* 351 * Iterator implementation. 352 */ 353 struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t> 354 { 355 static constexpr bool is_sorted_iterator = true; 356 iter_t (const hb_bit_page_t &s_ = Null (hb_bit_page_t), bool init = true) : s (&s_), v (INVALID) 357 { 358 if (init) 359 v = s->get_min (); 360 } 361 362 typedef hb_codepoint_t __item_t__; 363 hb_codepoint_t __item__ () const { return v; } 364 bool __more__ () const { return v != INVALID; } 365 void __next__ () { 366 s->next (&v); 367 } 368 void __prev__ () { s->previous (&v); } 369 iter_t end () const { return iter_t (*s, false); } 370 bool operator != (const iter_t& o) const 371 { return v != o.v; } 372 373 protected: 374 const hb_bit_page_t *s; 375 hb_codepoint_t v; 376 }; 377 iter_t iter () const { return iter_t (*this); } 378 operator iter_t () const { return iter (); } 379 380 static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 381 382 typedef unsigned long long elt_t; 383 static constexpr unsigned PAGE_BITS_LOG_2 = 9; // 512 bits 384 static constexpr unsigned PAGE_BITS = 1 << PAGE_BITS_LOG_2; 385 static_assert (1 << PAGE_BITS_LOG_2 == PAGE_BITS, ""); 386 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); 387 static constexpr unsigned PAGE_BITMASK = PAGE_BITS - 1; 388 389 static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); } 390 static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; } 391 392 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; 393 394 static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8; 395 static constexpr unsigned ELT_MASK = ELT_BITS - 1; 396 397 static constexpr unsigned BITS = sizeof (vector_t) * 8; 398 static constexpr unsigned MASK = BITS - 1; 399 static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, ""); 400 401 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } 402 const elt_t& elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } 403 static constexpr elt_t mask (hb_codepoint_t g) { return elt_t (1) << (g & ELT_MASK); } 404 405 mutable unsigned population; 406 vector_t v; 407 }; 408 409 410 #endif /* HB_BIT_PAGE_HH */