hb-ot-cmap-table.hh (66406B)
1 /* 2 * Copyright © 2014 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_OT_CMAP_TABLE_HH 28 #define HB_OT_CMAP_TABLE_HH 29 30 #include "hb-ot-os2-table.hh" 31 #include "hb-ot-shaper-arabic-pua.hh" 32 #include "hb-open-type.hh" 33 #include "hb-set.hh" 34 #include "hb-cache.hh" 35 36 /* 37 * cmap -- Character to Glyph Index Mapping 38 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap 39 */ 40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') 41 42 namespace OT { 43 44 static inline uint8_t unicode_to_macroman (hb_codepoint_t u) 45 { 46 static const struct unicode_to_macroman_t 47 { 48 uint16_t unicode; 49 uint8_t macroman; 50 } 51 mapping[] = 52 { 53 { 0x00A0, 0xCA }, 54 { 0x00A1, 0xC1 }, 55 { 0x00A2, 0xA2 }, 56 { 0x00A3, 0xA3 }, 57 { 0x00A5, 0xB4 }, 58 { 0x00A7, 0xA4 }, 59 { 0x00A8, 0xAC }, 60 { 0x00A9, 0xA9 }, 61 { 0x00AA, 0xBB }, 62 { 0x00AB, 0xC7 }, 63 { 0x00AC, 0xC2 }, 64 { 0x00AE, 0xA8 }, 65 { 0x00AF, 0xF8 }, 66 { 0x00B0, 0xA1 }, 67 { 0x00B1, 0xB1 }, 68 { 0x00B4, 0xAB }, 69 { 0x00B5, 0xB5 }, 70 { 0x00B6, 0xA6 }, 71 { 0x00B7, 0xE1 }, 72 { 0x00B8, 0xFC }, 73 { 0x00BA, 0xBC }, 74 { 0x00BB, 0xC8 }, 75 { 0x00BF, 0xC0 }, 76 { 0x00C0, 0xCB }, 77 { 0x00C1, 0xE7 }, 78 { 0x00C2, 0xE5 }, 79 { 0x00C3, 0xCC }, 80 { 0x00C4, 0x80 }, 81 { 0x00C5, 0x81 }, 82 { 0x00C6, 0xAE }, 83 { 0x00C7, 0x82 }, 84 { 0x00C8, 0xE9 }, 85 { 0x00C9, 0x83 }, 86 { 0x00CA, 0xE6 }, 87 { 0x00CB, 0xE8 }, 88 { 0x00CC, 0xED }, 89 { 0x00CD, 0xEA }, 90 { 0x00CE, 0xEB }, 91 { 0x00CF, 0xEC }, 92 { 0x00D1, 0x84 }, 93 { 0x00D2, 0xF1 }, 94 { 0x00D3, 0xEE }, 95 { 0x00D4, 0xEF }, 96 { 0x00D5, 0xCD }, 97 { 0x00D6, 0x85 }, 98 { 0x00D8, 0xAF }, 99 { 0x00D9, 0xF4 }, 100 { 0x00DA, 0xF2 }, 101 { 0x00DB, 0xF3 }, 102 { 0x00DC, 0x86 }, 103 { 0x00DF, 0xA7 }, 104 { 0x00E0, 0x88 }, 105 { 0x00E1, 0x87 }, 106 { 0x00E2, 0x89 }, 107 { 0x00E3, 0x8B }, 108 { 0x00E4, 0x8A }, 109 { 0x00E5, 0x8C }, 110 { 0x00E6, 0xBE }, 111 { 0x00E7, 0x8D }, 112 { 0x00E8, 0x8F }, 113 { 0x00E9, 0x8E }, 114 { 0x00EA, 0x90 }, 115 { 0x00EB, 0x91 }, 116 { 0x00EC, 0x93 }, 117 { 0x00ED, 0x92 }, 118 { 0x00EE, 0x94 }, 119 { 0x00EF, 0x95 }, 120 { 0x00F1, 0x96 }, 121 { 0x00F2, 0x98 }, 122 { 0x00F3, 0x97 }, 123 { 0x00F4, 0x99 }, 124 { 0x00F5, 0x9B }, 125 { 0x00F6, 0x9A }, 126 { 0x00F7, 0xD6 }, 127 { 0x00F8, 0xBF }, 128 { 0x00F9, 0x9D }, 129 { 0x00FA, 0x9C }, 130 { 0x00FB, 0x9E }, 131 { 0x00FC, 0x9F }, 132 { 0x00FF, 0xD8 }, 133 { 0x0131, 0xF5 }, 134 { 0x0152, 0xCE }, 135 { 0x0153, 0xCF }, 136 { 0x0178, 0xD9 }, 137 { 0x0192, 0xC4 }, 138 { 0x02C6, 0xF6 }, 139 { 0x02C7, 0xFF }, 140 { 0x02D8, 0xF9 }, 141 { 0x02D9, 0xFA }, 142 { 0x02DA, 0xFB }, 143 { 0x02DB, 0xFE }, 144 { 0x02DC, 0xF7 }, 145 { 0x02DD, 0xFD }, 146 { 0x03A9, 0xBD }, 147 { 0x03C0, 0xB9 }, 148 { 0x2013, 0xD0 }, 149 { 0x2014, 0xD1 }, 150 { 0x2018, 0xD4 }, 151 { 0x2019, 0xD5 }, 152 { 0x201A, 0xE2 }, 153 { 0x201C, 0xD2 }, 154 { 0x201D, 0xD3 }, 155 { 0x201E, 0xE3 }, 156 { 0x2020, 0xA0 }, 157 { 0x2021, 0xE0 }, 158 { 0x2022, 0xA5 }, 159 { 0x2026, 0xC9 }, 160 { 0x2030, 0xE4 }, 161 { 0x2039, 0xDC }, 162 { 0x203A, 0xDD }, 163 { 0x2044, 0xDA }, 164 { 0x20AC, 0xDB }, 165 { 0x2122, 0xAA }, 166 { 0x2202, 0xB6 }, 167 { 0x2206, 0xC6 }, 168 { 0x220F, 0xB8 }, 169 { 0x2211, 0xB7 }, 170 { 0x221A, 0xC3 }, 171 { 0x221E, 0xB0 }, 172 { 0x222B, 0xBA }, 173 { 0x2248, 0xC5 }, 174 { 0x2260, 0xAD }, 175 { 0x2264, 0xB2 }, 176 { 0x2265, 0xB3 }, 177 { 0x25CA, 0xD7 }, 178 { 0xF8FF, 0xF0 }, 179 { 0xFB01, 0xDE }, 180 { 0xFB02, 0xDF }, 181 }; 182 auto *c = hb_bsearch (u, mapping, ARRAY_LENGTH (mapping), sizeof (mapping[0]), 183 _hb_cmp_operator<uint16_t, uint16_t>); 184 return c ? c->macroman : 0; 185 } 186 187 struct CmapSubtableFormat0 188 { 189 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 190 { 191 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; 192 if (unlikely (!gid)) 193 return false; 194 *glyph = gid; 195 return true; 196 } 197 198 unsigned get_language () const 199 { 200 return language; 201 } 202 203 void collect_unicodes (hb_set_t *out) const 204 { 205 for (unsigned int i = 0; i < 256; i++) 206 if (glyphIdArray[i]) 207 out->add (i); 208 } 209 210 void collect_mapping (hb_set_t *unicodes, /* OUT */ 211 hb_map_t *mapping /* OUT */) const 212 { 213 for (unsigned i = 0; i < 256; i++) 214 if (glyphIdArray[i]) 215 { 216 hb_codepoint_t glyph = glyphIdArray[i]; 217 unicodes->add (i); 218 mapping->set (i, glyph); 219 } 220 } 221 222 bool sanitize (hb_sanitize_context_t *c) const 223 { 224 TRACE_SANITIZE (this); 225 return_trace (c->check_struct (this)); 226 } 227 228 protected: 229 HBUINT16 format; /* Format number is set to 0. */ 230 HBUINT16 length; /* Byte length of this subtable. */ 231 HBUINT16 language; /* Ignore. */ 232 HBUINT8 glyphIdArray[256];/* An array that maps character 233 * code to glyph index values. */ 234 public: 235 DEFINE_SIZE_STATIC (6 + 256); 236 }; 237 238 struct CmapSubtableFormat4 239 { 240 241 242 template<typename Iterator, 243 typename Writer, 244 hb_requires (hb_is_iterator (Iterator))> 245 void to_ranges (Iterator it, Writer& range_writer) 246 { 247 hb_codepoint_t start_cp = 0, prev_run_start_cp = 0, run_start_cp = 0, end_cp = 0, last_gid = 0; 248 int run_length = 0 , delta = 0, prev_delta = 0; 249 250 enum { 251 FIRST_SUB_RANGE, 252 FOLLOWING_SUB_RANGE, 253 } mode; 254 255 while (it) { 256 // Start a new range 257 { 258 const auto& pair = *it; 259 start_cp = pair.first; 260 prev_run_start_cp = start_cp; 261 run_start_cp = start_cp; 262 end_cp = start_cp; 263 last_gid = pair.second; 264 run_length = 1; 265 prev_delta = 0; 266 } 267 268 delta = last_gid - start_cp; 269 mode = FIRST_SUB_RANGE; 270 it++; 271 272 while (it) { 273 // Process range 274 const auto& pair = *it; 275 hb_codepoint_t next_cp = pair.first; 276 hb_codepoint_t next_gid = pair.second; 277 if (next_cp != end_cp + 1) { 278 // Current range is over, stop processing. 279 break; 280 } 281 282 if (next_gid == last_gid + 1) { 283 // The current run continues. 284 end_cp = next_cp; 285 run_length++; 286 last_gid = next_gid; 287 it++; 288 continue; 289 } 290 291 // A new run is starting, decide if we want to commit the current run. 292 int split_cost = (mode == FIRST_SUB_RANGE) ? 8 : 16; 293 int run_cost = run_length * 2; 294 if (run_cost >= split_cost) { 295 commit_current_range(start_cp, 296 prev_run_start_cp, 297 run_start_cp, 298 end_cp, 299 delta, 300 prev_delta, 301 split_cost, 302 range_writer); 303 start_cp = next_cp; 304 } 305 306 // Start the new run 307 mode = FOLLOWING_SUB_RANGE; 308 prev_run_start_cp = run_start_cp; 309 run_start_cp = next_cp; 310 end_cp = next_cp; 311 prev_delta = delta; 312 delta = next_gid - run_start_cp; 313 run_length = 1; 314 last_gid = next_gid; 315 it++; 316 } 317 318 // Finalize range 319 commit_current_range (start_cp, 320 prev_run_start_cp, 321 run_start_cp, 322 end_cp, 323 delta, 324 prev_delta, 325 8, 326 range_writer); 327 } 328 329 if (likely (end_cp != 0xFFFF)) { 330 range_writer (0xFFFF, 0xFFFF, 1); 331 } 332 } 333 334 /* 335 * Writes the current range as either one or two ranges depending on what is most efficient. 336 */ 337 template<typename Writer> 338 void commit_current_range (hb_codepoint_t start, 339 hb_codepoint_t prev_run_start, 340 hb_codepoint_t run_start, 341 hb_codepoint_t end, 342 int run_delta, 343 int previous_run_delta, 344 int split_cost, 345 Writer& range_writer) { 346 bool should_split = false; 347 if (start < run_start && run_start < end) { 348 int run_cost = (end - run_start + 1) * 2; 349 if (run_cost >= split_cost) { 350 should_split = true; 351 } 352 } 353 354 // TODO(grieger): handle case where delta is legitimately 0, mark range offset array instead? 355 if (should_split) { 356 if (start == prev_run_start) 357 range_writer (start, run_start - 1, previous_run_delta); 358 else 359 range_writer (start, run_start - 1, 0); 360 range_writer (run_start, end, run_delta); 361 return; 362 } 363 364 365 if (start == run_start) { 366 // Range is only a run 367 range_writer (start, end, run_delta); 368 return; 369 } 370 371 // Write only a single non-run range. 372 range_writer (start, end, 0); 373 } 374 375 template<typename Iterator, 376 hb_requires (hb_is_iterator (Iterator))> 377 unsigned serialize_find_segcount (Iterator it) { 378 struct Counter { 379 unsigned segcount = 0; 380 381 void operator() (hb_codepoint_t start, 382 hb_codepoint_t end, 383 int delta) { 384 segcount++; 385 } 386 } counter; 387 388 to_ranges (+it, counter); 389 return counter.segcount; 390 } 391 392 393 template<typename Iterator, 394 hb_requires (hb_is_iterator (Iterator))> 395 bool serialize_start_end_delta_arrays (hb_serialize_context_t *c, 396 Iterator it, 397 int segcount) 398 { 399 struct Writer { 400 hb_serialize_context_t *serializer_; 401 HBUINT16* end_code_; 402 HBUINT16* start_code_; 403 HBINT16* id_delta_; 404 int index_; 405 406 Writer(hb_serialize_context_t *serializer) 407 : serializer_(serializer), 408 end_code_(nullptr), 409 start_code_(nullptr), 410 id_delta_(nullptr), 411 index_ (0) {} 412 void operator() (hb_codepoint_t start, 413 hb_codepoint_t end, 414 int delta) { 415 start_code_[index_] = start; 416 end_code_[index_] = end; 417 id_delta_[index_] = delta; 418 index_++; 419 } 420 } writer(c); 421 422 writer.end_code_ = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount, false); 423 (void) c->allocate_size<HBUINT16> (2); // padding 424 writer.start_code_ = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount, false); 425 writer.id_delta_ = c->allocate_size<HBINT16> (HBINT16::static_size * segcount, false); 426 427 if (unlikely (!writer.end_code_ || !writer.start_code_ || !writer.id_delta_)) return false; 428 429 to_ranges (+it, writer); 430 return true; 431 } 432 433 template<typename Iterator, 434 hb_requires (hb_is_iterator (Iterator))> 435 HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c, 436 Iterator it, 437 HBUINT16 *endCode, 438 HBUINT16 *startCode, 439 HBINT16 *idDelta, 440 unsigned segcount) 441 { 442 hb_map_t cp_to_gid { it }; 443 444 HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount); 445 if (unlikely (!c->check_success (idRangeOffset))) return nullptr; 446 if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr; 447 448 for (unsigned i : + hb_range (segcount) 449 | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })) 450 { 451 idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i); 452 for (hb_codepoint_t cp = startCode[i]; cp <= endCode[i]; cp++) 453 { 454 HBUINT16 gid; 455 gid = cp_to_gid[cp]; 456 c->copy<HBUINT16> (gid); 457 } 458 } 459 460 return idRangeOffset; 461 } 462 463 template<typename Iterator, 464 hb_requires (hb_is_iterator (Iterator))> 465 void serialize (hb_serialize_context_t *c, 466 Iterator it) 467 { 468 auto format4_iter = 469 + it 470 | hb_filter ([&] (const hb_codepoint_pair_t _) 471 { return _.first <= 0xFFFF; }) 472 ; 473 474 if (!format4_iter) return; 475 476 unsigned table_initpos = c->length (); 477 if (unlikely (!c->extend_min (this))) return; 478 this->format = 4; 479 480 hb_vector_t<hb_codepoint_pair_t> cp_to_gid { 481 format4_iter 482 }; 483 484 //serialize endCode[], startCode[], idDelta[] 485 HBUINT16* endCode = c->start_embed<HBUINT16> (); 486 unsigned segcount = serialize_find_segcount (cp_to_gid.iter()); 487 if (unlikely (!serialize_start_end_delta_arrays (c, cp_to_gid.iter(), segcount))) 488 return; 489 490 HBUINT16 *startCode = endCode + segcount + 1; 491 HBINT16 *idDelta = ((HBINT16*)startCode) + segcount; 492 493 HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, 494 cp_to_gid.iter (), 495 endCode, 496 startCode, 497 idDelta, 498 segcount); 499 if (unlikely (!c->check_success (idRangeOffset))) return; 500 501 this->length = c->length () - table_initpos; 502 if ((long long) this->length != (long long) c->length () - table_initpos) 503 { 504 c->err (HB_SERIALIZE_ERROR_INT_OVERFLOW); 505 return; 506 } 507 508 this->segCountX2 = segcount * 2; 509 this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1; 510 this->searchRange = 2 * (1u << this->entrySelector); 511 this->rangeShift = segcount * 2 > this->searchRange 512 ? 2 * segcount - this->searchRange 513 : 0; 514 } 515 516 unsigned get_language () const 517 { 518 return language; 519 } 520 521 struct accelerator_t 522 { 523 accelerator_t () {} 524 accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } 525 526 void init (const CmapSubtableFormat4 *subtable) 527 { 528 segCount = subtable->segCountX2 / 2; 529 endCount = subtable->values.arrayZ; 530 startCount = endCount + segCount + 1; 531 idDelta = startCount + segCount; 532 idRangeOffset = idDelta + segCount; 533 glyphIdArray = idRangeOffset + segCount; 534 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; 535 } 536 537 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 538 { 539 struct CustomRange 540 { 541 int cmp (hb_codepoint_t k, 542 unsigned distance) const 543 { 544 if (k > last) return +1; 545 if (k < (&last)[distance]/*first*/) return -1; 546 return 0; 547 } 548 HBUINT16 last; 549 }; 550 551 const HBUINT16 *found = hb_bsearch (codepoint, 552 this->endCount, 553 this->segCount, 554 sizeof (CustomRange), 555 _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>, 556 this->segCount + 1); 557 if (unlikely (!found)) 558 return false; 559 unsigned int i = found - endCount; 560 561 hb_codepoint_t gid; 562 unsigned int rangeOffset = this->idRangeOffset[i]; 563 if (rangeOffset == 0) 564 gid = codepoint + this->idDelta[i]; 565 else 566 { 567 /* Somebody has been smoking... */ 568 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 569 if (unlikely (index >= this->glyphIdArrayLength)) 570 return false; 571 gid = this->glyphIdArray[index]; 572 if (unlikely (!gid)) 573 return false; 574 gid += this->idDelta[i]; 575 } 576 gid &= 0xFFFFu; 577 if (unlikely (!gid)) 578 return false; 579 *glyph = gid; 580 return true; 581 } 582 583 HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) 584 { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); } 585 586 void collect_unicodes (hb_set_t *out) const 587 { 588 unsigned int count = this->segCount; 589 if (count && this->startCount[count - 1] == 0xFFFFu) 590 count--; /* Skip sentinel segment. */ 591 for (unsigned int i = 0; i < count; i++) 592 { 593 hb_codepoint_t start = this->startCount[i]; 594 hb_codepoint_t end = this->endCount[i]; 595 unsigned int rangeOffset = this->idRangeOffset[i]; 596 out->add_range(start, end); 597 if (rangeOffset == 0) 598 { 599 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 600 { 601 hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; 602 if (unlikely (!gid)) 603 out->del(codepoint); 604 } 605 } 606 else 607 { 608 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 609 { 610 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 611 if (unlikely (index >= this->glyphIdArrayLength)) 612 { 613 out->del_range (codepoint, end); 614 break; 615 } 616 hb_codepoint_t gid = this->glyphIdArray[index]; 617 if (unlikely (!gid)) 618 out->del(codepoint); 619 } 620 } 621 } 622 } 623 624 void collect_mapping (hb_set_t *unicodes, /* OUT */ 625 hb_map_t *mapping /* OUT */) const 626 { 627 // TODO(grieger): optimize similar to collect_unicodes 628 // (ie. use add_range()) 629 unsigned count = this->segCount; 630 if (count && this->startCount[count - 1] == 0xFFFFu) 631 count--; /* Skip sentinel segment. */ 632 for (unsigned i = 0; i < count; i++) 633 { 634 hb_codepoint_t start = this->startCount[i]; 635 hb_codepoint_t end = this->endCount[i]; 636 unsigned rangeOffset = this->idRangeOffset[i]; 637 if (rangeOffset == 0) 638 { 639 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 640 { 641 hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; 642 if (unlikely (!gid)) 643 continue; 644 unicodes->add (codepoint); 645 mapping->set (codepoint, gid); 646 } 647 } 648 else 649 { 650 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 651 { 652 unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 653 if (unlikely (index >= this->glyphIdArrayLength)) 654 break; 655 hb_codepoint_t gid = this->glyphIdArray[index]; 656 if (unlikely (!gid)) 657 continue; 658 unicodes->add (codepoint); 659 mapping->set (codepoint, gid); 660 } 661 } 662 } 663 } 664 665 const HBUINT16 *endCount; 666 const HBUINT16 *startCount; 667 const HBUINT16 *idDelta; 668 const HBUINT16 *idRangeOffset; 669 const HBUINT16 *glyphIdArray; 670 unsigned int segCount; 671 unsigned int glyphIdArrayLength; 672 }; 673 674 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 675 { 676 accelerator_t accel (this); 677 return accel.get_glyph_func (&accel, codepoint, glyph); 678 } 679 void collect_unicodes (hb_set_t *out) const 680 { 681 accelerator_t accel (this); 682 accel.collect_unicodes (out); 683 } 684 685 void collect_mapping (hb_set_t *unicodes, /* OUT */ 686 hb_map_t *mapping /* OUT */) const 687 { 688 accelerator_t accel (this); 689 accel.collect_mapping (unicodes, mapping); 690 } 691 692 bool sanitize (hb_sanitize_context_t *c) const 693 { 694 TRACE_SANITIZE (this); 695 if (unlikely (!c->check_struct (this))) 696 return_trace (false); 697 hb_barrier (); 698 699 if (unlikely (!c->check_range (this, length))) 700 return_trace (false); 701 702 return_trace (16 + 4 * (unsigned int) segCountX2 <= length); 703 } 704 705 706 707 protected: 708 HBUINT16 format; /* Format number is set to 4. */ 709 HBUINT16 length; /* This is the length in bytes of the 710 * subtable. */ 711 HBUINT16 language; /* Ignore. */ 712 HBUINT16 segCountX2; /* 2 x segCount. */ 713 HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ 714 HBUINT16 entrySelector; /* log2(searchRange/2) */ 715 HBUINT16 rangeShift; /* 2 x segCount - searchRange */ 716 717 UnsizedArrayOf<HBUINT16> 718 values; 719 #if 0 720 HBUINT16 endCount[segCount]; /* End characterCode for each segment, 721 * last=0xFFFFu. */ 722 HBUINT16 reservedPad; /* Set to 0. */ 723 HBUINT16 startCount[segCount]; /* Start character code for each segment. */ 724 HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ 725 HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ 726 UnsizedArrayOf<HBUINT16> 727 glyphIdArray; /* Glyph index array (arbitrary length) */ 728 #endif 729 730 public: 731 DEFINE_SIZE_ARRAY (14, values); 732 }; 733 734 struct CmapSubtableLongGroup 735 { 736 friend struct CmapSubtableFormat12; 737 friend struct CmapSubtableFormat13; 738 template<typename U> 739 friend struct CmapSubtableLongSegmented; 740 friend struct cmap; 741 742 int cmp (hb_codepoint_t codepoint) const 743 { 744 if (codepoint < startCharCode) return -1; 745 if (codepoint > endCharCode) return +1; 746 return 0; 747 } 748 749 bool sanitize (hb_sanitize_context_t *c) const 750 { 751 TRACE_SANITIZE (this); 752 return_trace (c->check_struct (this)); 753 } 754 755 private: 756 HBUINT32 startCharCode; /* First character code in this group. */ 757 HBUINT32 endCharCode; /* Last character code in this group. */ 758 HBUINT32 glyphID; /* Glyph index; interpretation depends on 759 * subtable format. */ 760 public: 761 DEFINE_SIZE_STATIC (12); 762 }; 763 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup); 764 765 template <typename UINT> 766 struct CmapSubtableTrimmed 767 { 768 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 769 { 770 /* Rely on our implicit array bound-checking. */ 771 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; 772 if (unlikely (!gid)) 773 return false; 774 *glyph = gid; 775 return true; 776 } 777 778 unsigned get_language () const 779 { 780 return language; 781 } 782 783 void collect_unicodes (hb_set_t *out) const 784 { 785 hb_codepoint_t start = startCharCode; 786 unsigned int count = glyphIdArray.len; 787 for (unsigned int i = 0; i < count; i++) 788 if (glyphIdArray[i]) 789 out->add (start + i); 790 } 791 792 void collect_mapping (hb_set_t *unicodes, /* OUT */ 793 hb_map_t *mapping /* OUT */) const 794 { 795 hb_codepoint_t start_cp = startCharCode; 796 unsigned count = glyphIdArray.len; 797 for (unsigned i = 0; i < count; i++) 798 if (glyphIdArray[i]) 799 { 800 hb_codepoint_t unicode = start_cp + i; 801 hb_codepoint_t glyphid = glyphIdArray[i]; 802 unicodes->add (unicode); 803 mapping->set (unicode, glyphid); 804 } 805 } 806 807 bool sanitize (hb_sanitize_context_t *c) const 808 { 809 TRACE_SANITIZE (this); 810 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); 811 } 812 813 protected: 814 UINT formatReserved; /* Subtable format and (maybe) padding. */ 815 UINT length; /* Byte length of this subtable. */ 816 UINT language; /* Ignore. */ 817 UINT startCharCode; /* First character code covered. */ 818 ArrayOf<HBGlyphID16, UINT> 819 glyphIdArray; /* Array of glyph index values for character 820 * codes in the range. */ 821 public: 822 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); 823 }; 824 825 struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; 826 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32> {}; 827 828 template <typename T> 829 struct CmapSubtableLongSegmented 830 { 831 friend struct cmap; 832 833 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 834 { 835 hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint); 836 if (unlikely (!gid)) 837 return false; 838 *glyph = gid; 839 return true; 840 } 841 842 unsigned get_language () const 843 { 844 return language; 845 } 846 847 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const 848 { 849 for (unsigned int i = 0; i < this->groups.len; i++) 850 { 851 hb_codepoint_t start = this->groups[i].startCharCode; 852 hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode, 853 (hb_codepoint_t) HB_UNICODE_MAX); 854 hb_codepoint_t gid = this->groups[i].glyphID; 855 if (!gid) 856 { 857 /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */ 858 if (! T::group_get_glyph (this->groups[i], end)) continue; 859 start++; 860 gid++; 861 } 862 if (unlikely ((unsigned int) gid >= num_glyphs)) continue; 863 if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) 864 end = start + (hb_codepoint_t) num_glyphs - gid; 865 866 out->add_range (start, hb_min (end, 0x10FFFFu)); 867 } 868 } 869 870 void collect_mapping (hb_set_t *unicodes, /* OUT */ 871 hb_map_t *mapping, /* OUT */ 872 unsigned num_glyphs) const 873 { 874 hb_codepoint_t last_end = 0; 875 unsigned count = this->groups.len; 876 for (unsigned i = 0; i < count; i++) 877 { 878 hb_codepoint_t start = this->groups.arrayZ[i].startCharCode; 879 hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups.arrayZ[i].endCharCode, 880 (hb_codepoint_t) HB_UNICODE_MAX); 881 if (unlikely (start > end || start < last_end)) { 882 // Range is not in order and is invalid, skip it. 883 continue; 884 } 885 last_end = end; 886 887 888 hb_codepoint_t gid = this->groups.arrayZ[i].glyphID; 889 if (!gid) 890 { 891 if (T::formatNumber == 13) continue; 892 start++; 893 gid++; 894 } 895 if (unlikely ((unsigned int) gid >= num_glyphs)) continue; 896 if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) 897 end = start + (hb_codepoint_t) num_glyphs - gid; 898 899 mapping->alloc (mapping->get_population () + end - start + 1); 900 901 unicodes->add_range (start, end); 902 for (unsigned cp = start; cp <= end; cp++) 903 { 904 mapping->set (cp, gid); 905 gid += T::increment; 906 } 907 } 908 } 909 910 bool sanitize (hb_sanitize_context_t *c) const 911 { 912 TRACE_SANITIZE (this); 913 return_trace (c->check_struct (this) && groups.sanitize (c)); 914 } 915 916 protected: 917 HBUINT16 format; /* Subtable format; set to 12. */ 918 HBUINT16 reserved; /* Reserved; set to 0. */ 919 HBUINT32 length; /* Byte length of this subtable. */ 920 HBUINT32 language; /* Ignore. */ 921 SortedArray32Of<CmapSubtableLongGroup> 922 groups; /* Groupings. */ 923 public: 924 DEFINE_SIZE_ARRAY (16, groups); 925 }; 926 927 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> 928 { 929 static constexpr int increment = 1; 930 static constexpr int formatNumber = 12; 931 932 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 933 hb_codepoint_t u) 934 { return likely (group.startCharCode <= group.endCharCode) ? 935 group.glyphID + (u - group.startCharCode) : 0; } 936 937 938 template<typename Iterator, 939 hb_requires (hb_is_iterator (Iterator))> 940 void serialize (hb_serialize_context_t *c, 941 Iterator it) 942 { 943 if (!it) return; 944 unsigned table_initpos = c->length (); 945 if (unlikely (!c->extend_min (this))) return; 946 947 hb_codepoint_t startCharCode = (hb_codepoint_t) -1, endCharCode = (hb_codepoint_t) -1; 948 hb_codepoint_t glyphID = 0; 949 950 for (const auto& _ : +it) 951 { 952 if (startCharCode == (hb_codepoint_t) -1) 953 { 954 startCharCode = _.first; 955 endCharCode = _.first; 956 glyphID = _.second; 957 } 958 else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second)) 959 { 960 CmapSubtableLongGroup grouprecord; 961 grouprecord.startCharCode = startCharCode; 962 grouprecord.endCharCode = endCharCode; 963 grouprecord.glyphID = glyphID; 964 c->copy<CmapSubtableLongGroup> (grouprecord); 965 966 startCharCode = _.first; 967 endCharCode = _.first; 968 glyphID = _.second; 969 } 970 else 971 endCharCode = _.first; 972 } 973 974 CmapSubtableLongGroup record; 975 record.startCharCode = startCharCode; 976 record.endCharCode = endCharCode; 977 record.glyphID = glyphID; 978 c->copy<CmapSubtableLongGroup> (record); 979 980 this->format = 12; 981 this->reserved = 0; 982 this->length = c->length () - table_initpos; 983 this->groups.len = (this->length - min_size) / CmapSubtableLongGroup::static_size; 984 } 985 986 static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data) 987 { return 16 + 12 * groups_data.length; } 988 989 private: 990 static bool _is_gid_consecutive (hb_codepoint_t endCharCode, 991 hb_codepoint_t startCharCode, 992 hb_codepoint_t glyphID, 993 hb_codepoint_t cp, 994 hb_codepoint_t new_gid) 995 { 996 return (cp - 1 == endCharCode) && 997 new_gid == glyphID + (cp - startCharCode); 998 } 999 1000 }; 1001 1002 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> 1003 { 1004 static constexpr int increment = 0; 1005 static constexpr int formatNumber = 13; 1006 1007 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 1008 hb_codepoint_t u HB_UNUSED) 1009 { return group.glyphID; } 1010 }; 1011 1012 typedef enum 1013 { 1014 GLYPH_VARIANT_NOT_FOUND = 0, 1015 GLYPH_VARIANT_FOUND = 1, 1016 GLYPH_VARIANT_USE_DEFAULT = 2 1017 } glyph_variant_t; 1018 1019 struct UnicodeValueRange 1020 { 1021 int cmp (const hb_codepoint_t &codepoint) const 1022 { 1023 if (codepoint < startUnicodeValue) return -1; 1024 if (codepoint > startUnicodeValue + additionalCount) return +1; 1025 return 0; 1026 } 1027 1028 bool sanitize (hb_sanitize_context_t *c) const 1029 { 1030 TRACE_SANITIZE (this); 1031 return_trace (c->check_struct (this)); 1032 } 1033 1034 HBUINT24 startUnicodeValue; /* First value in this range. */ 1035 HBUINT8 additionalCount; /* Number of additional values in this 1036 * range. */ 1037 public: 1038 DEFINE_SIZE_STATIC (4); 1039 }; 1040 1041 struct DefaultUVS : SortedArray32Of<UnicodeValueRange> 1042 { 1043 void collect_unicodes (hb_set_t *out) const 1044 { 1045 unsigned int count = len; 1046 for (unsigned int i = 0; i < count; i++) 1047 { 1048 hb_codepoint_t first = arrayZ[i].startUnicodeValue; 1049 hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount), 1050 (hb_codepoint_t) HB_UNICODE_MAX); 1051 out->add_range (first, last); 1052 } 1053 } 1054 1055 DefaultUVS* copy (hb_serialize_context_t *c, 1056 const hb_set_t *unicodes) const 1057 { 1058 auto *out = c->start_embed<DefaultUVS> (); 1059 auto snap = c->snapshot (); 1060 1061 HBUINT32 len; 1062 len = 0; 1063 if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; 1064 unsigned init_len = c->length (); 1065 1066 if (this->len > unicodes->get_population () * hb_bit_storage ((unsigned) this->len)) 1067 { 1068 hb_codepoint_t start = HB_SET_VALUE_INVALID; 1069 hb_codepoint_t end = HB_SET_VALUE_INVALID; 1070 1071 for (auto u : *unicodes) 1072 { 1073 if (!as_array ().bsearch (u)) 1074 continue; 1075 if (start == HB_SET_VALUE_INVALID) 1076 { 1077 start = u; 1078 end = start - 1; 1079 } 1080 if (end + 1 != u || end - start == 255) 1081 { 1082 UnicodeValueRange rec; 1083 rec.startUnicodeValue = start; 1084 rec.additionalCount = end - start; 1085 c->copy<UnicodeValueRange> (rec); 1086 start = u; 1087 } 1088 end = u; 1089 } 1090 if (start != HB_SET_VALUE_INVALID) 1091 { 1092 UnicodeValueRange rec; 1093 rec.startUnicodeValue = start; 1094 rec.additionalCount = end - start; 1095 c->copy<UnicodeValueRange> (rec); 1096 } 1097 1098 } 1099 else 1100 { 1101 hb_codepoint_t lastCode = HB_SET_VALUE_INVALID; 1102 int count = -1; 1103 1104 for (const UnicodeValueRange& _ : *this) 1105 { 1106 hb_codepoint_t curEntry = (hb_codepoint_t) (_.startUnicodeValue - 1); 1107 hb_codepoint_t end = curEntry + _.additionalCount + 2; 1108 1109 for (; unicodes->next (&curEntry) && curEntry < end;) 1110 { 1111 count += 1; 1112 if (lastCode == HB_SET_VALUE_INVALID) 1113 lastCode = curEntry; 1114 else if (lastCode + count != curEntry) 1115 { 1116 UnicodeValueRange rec; 1117 rec.startUnicodeValue = lastCode; 1118 rec.additionalCount = count - 1; 1119 c->copy<UnicodeValueRange> (rec); 1120 1121 lastCode = curEntry; 1122 count = 0; 1123 } 1124 } 1125 } 1126 1127 if (lastCode != HB_MAP_VALUE_INVALID) 1128 { 1129 UnicodeValueRange rec; 1130 rec.startUnicodeValue = lastCode; 1131 rec.additionalCount = count; 1132 c->copy<UnicodeValueRange> (rec); 1133 } 1134 } 1135 1136 if (c->length () - init_len == 0) 1137 { 1138 c->revert (snap); 1139 return nullptr; 1140 } 1141 else 1142 { 1143 if (unlikely (!c->check_assign (out->len, 1144 (c->length () - init_len) / UnicodeValueRange::static_size, 1145 HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr; 1146 return out; 1147 } 1148 } 1149 1150 public: 1151 DEFINE_SIZE_ARRAY (4, *this); 1152 }; 1153 1154 struct UVSMapping 1155 { 1156 int cmp (const hb_codepoint_t &codepoint) const 1157 { return unicodeValue.cmp (codepoint); } 1158 1159 bool sanitize (hb_sanitize_context_t *c) const 1160 { 1161 TRACE_SANITIZE (this); 1162 return_trace (c->check_struct (this)); 1163 } 1164 1165 HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ 1166 HBGlyphID16 glyphID; /* Glyph ID of the UVS */ 1167 public: 1168 DEFINE_SIZE_STATIC (5); 1169 }; 1170 1171 struct NonDefaultUVS : SortedArray32Of<UVSMapping> 1172 { 1173 void collect_unicodes (hb_set_t *out) const 1174 { 1175 for (const auto& a : as_array ()) 1176 out->add (a.unicodeValue); 1177 } 1178 1179 void collect_mapping (hb_set_t *unicodes, /* OUT */ 1180 hb_map_t *mapping /* OUT */) const 1181 { 1182 for (const auto& a : as_array ()) 1183 { 1184 hb_codepoint_t unicode = a.unicodeValue; 1185 hb_codepoint_t glyphid = a.glyphID; 1186 unicodes->add (unicode); 1187 mapping->set (unicode, glyphid); 1188 } 1189 } 1190 1191 void closure_glyphs (const hb_set_t *unicodes, 1192 hb_set_t *glyphset) const 1193 { 1194 + as_array () 1195 | hb_filter (unicodes, &UVSMapping::unicodeValue) 1196 | hb_map (&UVSMapping::glyphID) 1197 | hb_sink (glyphset) 1198 ; 1199 } 1200 1201 NonDefaultUVS* copy (hb_serialize_context_t *c, 1202 const hb_set_t *unicodes, 1203 const hb_set_t *glyphs_requested, 1204 const hb_map_t *glyph_map) const 1205 { 1206 auto *out = c->start_embed<NonDefaultUVS> (); 1207 auto it = 1208 + as_array () 1209 | hb_filter ([&] (const UVSMapping& _) 1210 { 1211 return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID); 1212 }) 1213 ; 1214 1215 if (!it) return nullptr; 1216 1217 HBUINT32 len; 1218 len = it.len (); 1219 if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; 1220 1221 for (const UVSMapping& _ : it) 1222 { 1223 UVSMapping mapping; 1224 mapping.unicodeValue = _.unicodeValue; 1225 mapping.glyphID = glyph_map->get (_.glyphID); 1226 c->copy<UVSMapping> (mapping); 1227 } 1228 1229 return out; 1230 } 1231 1232 public: 1233 DEFINE_SIZE_ARRAY (4, *this); 1234 }; 1235 1236 struct VariationSelectorRecord 1237 { 1238 glyph_variant_t get_glyph (hb_codepoint_t codepoint, 1239 hb_codepoint_t *glyph, 1240 const void *base) const 1241 { 1242 if ((base+defaultUVS).bfind (codepoint)) 1243 return GLYPH_VARIANT_USE_DEFAULT; 1244 const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint); 1245 if (nonDefault.glyphID) 1246 { 1247 *glyph = nonDefault.glyphID; 1248 return GLYPH_VARIANT_FOUND; 1249 } 1250 return GLYPH_VARIANT_NOT_FOUND; 1251 } 1252 1253 VariationSelectorRecord(const VariationSelectorRecord& other) 1254 { 1255 *this = other; 1256 } 1257 1258 void operator= (const VariationSelectorRecord& other) 1259 { 1260 varSelector = other.varSelector; 1261 HBUINT32 offset = other.defaultUVS; 1262 defaultUVS = offset; 1263 offset = other.nonDefaultUVS; 1264 nonDefaultUVS = offset; 1265 } 1266 1267 void collect_unicodes (hb_set_t *out, const void *base) const 1268 { 1269 (base+defaultUVS).collect_unicodes (out); 1270 (base+nonDefaultUVS).collect_unicodes (out); 1271 } 1272 1273 void collect_mapping (const void *base, 1274 hb_set_t *unicodes, /* OUT */ 1275 hb_map_t *mapping /* OUT */) const 1276 { 1277 (base+defaultUVS).collect_unicodes (unicodes); 1278 (base+nonDefaultUVS).collect_mapping (unicodes, mapping); 1279 } 1280 1281 int cmp (const hb_codepoint_t &variation_selector) const 1282 { return varSelector.cmp (variation_selector); } 1283 1284 bool sanitize (hb_sanitize_context_t *c, const void *base) const 1285 { 1286 TRACE_SANITIZE (this); 1287 return_trace (c->check_struct (this) && 1288 defaultUVS.sanitize (c, base) && 1289 nonDefaultUVS.sanitize (c, base)); 1290 } 1291 1292 hb_pair_t<unsigned, unsigned> 1293 copy (hb_serialize_context_t *c, 1294 const hb_set_t *unicodes, 1295 const hb_set_t *glyphs_requested, 1296 const hb_map_t *glyph_map, 1297 const void *base) const 1298 { 1299 auto snap = c->snapshot (); 1300 auto *out = c->embed<VariationSelectorRecord> (*this); 1301 if (unlikely (!out)) return hb_pair (0, 0); 1302 1303 out->defaultUVS = 0; 1304 out->nonDefaultUVS = 0; 1305 1306 unsigned non_default_uvs_objidx = 0; 1307 if (nonDefaultUVS != 0) 1308 { 1309 c->push (); 1310 if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map)) 1311 non_default_uvs_objidx = c->pop_pack (); 1312 else c->pop_discard (); 1313 } 1314 1315 unsigned default_uvs_objidx = 0; 1316 if (defaultUVS != 0) 1317 { 1318 c->push (); 1319 if (c->copy (base+defaultUVS, unicodes)) 1320 default_uvs_objidx = c->pop_pack (); 1321 else c->pop_discard (); 1322 } 1323 1324 1325 if (!default_uvs_objidx && !non_default_uvs_objidx) 1326 c->revert (snap); 1327 1328 return hb_pair (default_uvs_objidx, non_default_uvs_objidx); 1329 } 1330 1331 HBUINT24 varSelector; /* Variation selector. */ 1332 Offset32To<DefaultUVS> 1333 defaultUVS; /* Offset to Default UVS Table. May be 0. */ 1334 Offset32To<NonDefaultUVS> 1335 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ 1336 public: 1337 DEFINE_SIZE_STATIC (11); 1338 }; 1339 1340 struct CmapSubtableFormat14 1341 { 1342 glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, 1343 hb_codepoint_t variation_selector, 1344 hb_codepoint_t *glyph) const 1345 { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); } 1346 1347 void collect_variation_selectors (hb_set_t *out) const 1348 { 1349 for (const auto& a : record.as_array ()) 1350 out->add (a.varSelector); 1351 } 1352 void collect_variation_unicodes (hb_codepoint_t variation_selector, 1353 hb_set_t *out) const 1354 { record.bsearch (variation_selector).collect_unicodes (out, this); } 1355 1356 void serialize (hb_serialize_context_t *c, 1357 const hb_set_t *unicodes, 1358 const hb_set_t *glyphs_requested, 1359 const hb_map_t *glyph_map, 1360 const void *base) 1361 { 1362 auto snap = c->snapshot (); 1363 unsigned table_initpos = c->length (); 1364 const char* init_tail = c->tail; 1365 1366 if (unlikely (!c->extend_min (this))) return; 1367 this->format = 14; 1368 1369 auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base); 1370 1371 /* 1372 * Some versions of OTS require that offsets are in order. Due to the use 1373 * of push()/pop_pack() serializing the variation records in order results 1374 * in the offsets being in reverse order (first record has the largest 1375 * offset). While this is perfectly valid, it will cause some versions of 1376 * OTS to consider this table bad. 1377 * 1378 * So to prevent this issue we serialize the variation records in reverse 1379 * order, so that the offsets are ordered from small to large. Since 1380 * variation records are supposed to be in increasing order of varSelector 1381 * we then have to reverse the order of the written variation selector 1382 * records after everything is finalized. 1383 */ 1384 hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices; 1385 for (int i = src_tbl->record.len - 1; i >= 0; i--) 1386 { 1387 if (!unicodes->has(src_tbl->record[i].varSelector)) 1388 continue; 1389 1390 hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base); 1391 if (result.first || result.second) 1392 obj_indices.push (result); 1393 } 1394 1395 if (c->length () - table_initpos == CmapSubtableFormat14::min_size) 1396 { 1397 c->revert (snap); 1398 return; 1399 } 1400 1401 if (unlikely (!c->check_success (!obj_indices.in_error ()))) 1402 return; 1403 1404 int tail_len = init_tail - c->tail; 1405 c->check_assign (this->length, c->length () - table_initpos + tail_len, 1406 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1407 c->check_assign (this->record.len, 1408 (c->length () - table_initpos - CmapSubtableFormat14::min_size) / 1409 VariationSelectorRecord::static_size, 1410 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1411 1412 /* Correct the incorrect write order by reversing the order of the variation 1413 records array. */ 1414 _reverse_variation_records (); 1415 1416 /* Now that records are in the right order, we can set up the offsets. */ 1417 _add_links_to_variation_records (c, obj_indices); 1418 } 1419 1420 void _reverse_variation_records () 1421 { 1422 record.as_array ().reverse (); 1423 } 1424 1425 void _add_links_to_variation_records (hb_serialize_context_t *c, 1426 const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices) 1427 { 1428 for (unsigned i = 0; i < obj_indices.length; i++) 1429 { 1430 /* 1431 * Since the record array has been reversed (see comments in copy()) 1432 * but obj_indices has not been, the indices at obj_indices[i] 1433 * are for the variation record at record[j]. 1434 */ 1435 int j = obj_indices.length - 1 - i; 1436 c->add_link (record[j].defaultUVS, obj_indices[i].first); 1437 c->add_link (record[j].nonDefaultUVS, obj_indices[i].second); 1438 } 1439 } 1440 1441 void closure_glyphs (const hb_set_t *unicodes, 1442 hb_set_t *glyphset) const 1443 { 1444 + hb_iter (record) 1445 | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS) 1446 | hb_filter (unicodes, &VariationSelectorRecord::varSelector) 1447 | hb_map (&VariationSelectorRecord::nonDefaultUVS) 1448 | hb_map (hb_add (this)) 1449 | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); }) 1450 ; 1451 } 1452 1453 void collect_unicodes (hb_set_t *out) const 1454 { 1455 for (const VariationSelectorRecord& _ : record) 1456 _.collect_unicodes (out, this); 1457 } 1458 1459 void collect_mapping (hb_set_t *unicodes, /* OUT */ 1460 hb_map_t *mapping /* OUT */) const 1461 { 1462 for (const VariationSelectorRecord& _ : record) 1463 _.collect_mapping (this, unicodes, mapping); 1464 } 1465 1466 bool sanitize (hb_sanitize_context_t *c) const 1467 { 1468 TRACE_SANITIZE (this); 1469 return_trace (c->check_struct (this) && 1470 record.sanitize (c, this)); 1471 } 1472 1473 protected: 1474 HBUINT16 format; /* Format number is set to 14. */ 1475 HBUINT32 length; /* Byte length of this subtable. */ 1476 SortedArray32Of<VariationSelectorRecord> 1477 record; /* Variation selector records; sorted 1478 * in increasing order of `varSelector'. */ 1479 public: 1480 DEFINE_SIZE_ARRAY (10, record); 1481 }; 1482 1483 struct CmapSubtable 1484 { 1485 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ 1486 1487 bool get_glyph (hb_codepoint_t codepoint, 1488 hb_codepoint_t *glyph) const 1489 { 1490 switch (u.format.v) { 1491 case 0: hb_barrier (); return u.format0 .get_glyph (codepoint, glyph); 1492 case 4: hb_barrier (); return u.format4 .get_glyph (codepoint, glyph); 1493 case 6: hb_barrier (); return u.format6 .get_glyph (codepoint, glyph); 1494 case 10: hb_barrier (); return u.format10.get_glyph (codepoint, glyph); 1495 case 12: hb_barrier (); return u.format12.get_glyph (codepoint, glyph); 1496 case 13: hb_barrier (); return u.format13.get_glyph (codepoint, glyph); 1497 case 14: 1498 default: return false; 1499 } 1500 } 1501 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const 1502 { 1503 switch (u.format.v) { 1504 case 0: hb_barrier (); u.format0 .collect_unicodes (out); return; 1505 case 4: hb_barrier (); u.format4 .collect_unicodes (out); return; 1506 case 6: hb_barrier (); u.format6 .collect_unicodes (out); return; 1507 case 10: hb_barrier (); u.format10.collect_unicodes (out); return; 1508 case 12: hb_barrier (); u.format12.collect_unicodes (out, num_glyphs); return; 1509 case 13: hb_barrier (); u.format13.collect_unicodes (out, num_glyphs); return; 1510 case 14: 1511 default: return; 1512 } 1513 } 1514 1515 void collect_mapping (hb_set_t *unicodes, /* OUT */ 1516 hb_map_t *mapping, /* OUT */ 1517 unsigned num_glyphs = UINT_MAX) const 1518 { 1519 switch (u.format.v) { 1520 case 0: hb_barrier (); u.format0 .collect_mapping (unicodes, mapping); return; 1521 case 4: hb_barrier (); u.format4 .collect_mapping (unicodes, mapping); return; 1522 case 6: hb_barrier (); u.format6 .collect_mapping (unicodes, mapping); return; 1523 case 10: hb_barrier (); u.format10.collect_mapping (unicodes, mapping); return; 1524 case 12: hb_barrier (); u.format12.collect_mapping (unicodes, mapping, num_glyphs); return; 1525 case 13: hb_barrier (); u.format13.collect_mapping (unicodes, mapping, num_glyphs); return; 1526 case 14: 1527 default: return; 1528 } 1529 } 1530 1531 unsigned get_language () const 1532 { 1533 switch (u.format.v) { 1534 case 0: hb_barrier (); return u.format0 .get_language (); 1535 case 4: hb_barrier (); return u.format4 .get_language (); 1536 case 6: hb_barrier (); return u.format6 .get_language (); 1537 case 10: hb_barrier (); return u.format10.get_language (); 1538 case 12: hb_barrier (); return u.format12.get_language (); 1539 case 13: hb_barrier (); return u.format13.get_language (); 1540 case 14: 1541 default: return 0; 1542 } 1543 } 1544 1545 template<typename Iterator, 1546 hb_requires (hb_is_iterator (Iterator))> 1547 void serialize (hb_serialize_context_t *c, 1548 Iterator it, 1549 unsigned format, 1550 const hb_subset_plan_t *plan, 1551 const void *base) 1552 { 1553 switch (format) { 1554 case 4: hb_barrier (); return u.format4.serialize (c, it); 1555 case 12: hb_barrier (); return u.format12.serialize (c, it); 1556 case 14: hb_barrier (); return u.format14.serialize (c, &plan->unicodes, &plan->glyphs_requested, plan->glyph_map, base); 1557 default: return; 1558 } 1559 } 1560 1561 bool sanitize (hb_sanitize_context_t *c) const 1562 { 1563 TRACE_SANITIZE (this); 1564 if (!u.format.v.sanitize (c)) return_trace (false); 1565 hb_barrier (); 1566 switch (u.format.v) { 1567 case 0: hb_barrier (); return_trace (u.format0 .sanitize (c)); 1568 case 4: hb_barrier (); return_trace (u.format4 .sanitize (c)); 1569 case 6: hb_barrier (); return_trace (u.format6 .sanitize (c)); 1570 case 10: hb_barrier (); return_trace (u.format10.sanitize (c)); 1571 case 12: hb_barrier (); return_trace (u.format12.sanitize (c)); 1572 case 13: hb_barrier (); return_trace (u.format13.sanitize (c)); 1573 case 14: hb_barrier (); return_trace (u.format14.sanitize (c)); 1574 default:return_trace (true); 1575 } 1576 } 1577 1578 public: 1579 union { 1580 struct { HBUINT16 v; } format; /* Format identifier */ 1581 CmapSubtableFormat0 format0; 1582 CmapSubtableFormat4 format4; 1583 CmapSubtableFormat6 format6; 1584 CmapSubtableFormat10 format10; 1585 CmapSubtableFormat12 format12; 1586 CmapSubtableFormat13 format13; 1587 CmapSubtableFormat14 format14; 1588 } u; 1589 public: 1590 DEFINE_SIZE_UNION (2, format.v); 1591 }; 1592 1593 1594 struct EncodingRecord 1595 { 1596 int cmp (const EncodingRecord &other) const 1597 { 1598 int ret; 1599 ret = platformID.cmp (other.platformID); 1600 if (ret) return ret; 1601 if (other.encodingID != 0xFFFF) 1602 { 1603 ret = encodingID.cmp (other.encodingID); 1604 if (ret) return ret; 1605 } 1606 return 0; 1607 } 1608 1609 bool sanitize (hb_sanitize_context_t *c, const void *base) const 1610 { 1611 TRACE_SANITIZE (this); 1612 return_trace (c->check_struct (this) && 1613 subtable.sanitize (c, base)); 1614 } 1615 1616 template<typename Iterator, 1617 hb_requires (hb_is_iterator (Iterator))> 1618 EncodingRecord* copy (hb_serialize_context_t *c, 1619 Iterator it, 1620 unsigned format, 1621 const void *base, 1622 const hb_subset_plan_t *plan, 1623 /* INOUT */ unsigned *objidx) const 1624 { 1625 TRACE_SERIALIZE (this); 1626 auto snap = c->snapshot (); 1627 auto *out = c->embed (this); 1628 if (unlikely (!out)) return_trace (nullptr); 1629 out->subtable = 0; 1630 1631 if (*objidx == 0) 1632 { 1633 CmapSubtable *cmapsubtable = c->push<CmapSubtable> (); 1634 unsigned origin_length = c->length (); 1635 cmapsubtable->serialize (c, it, format, plan, &(base+subtable)); 1636 if (c->length () - origin_length > 0 && !c->in_error()) *objidx = c->pop_pack (); 1637 else c->pop_discard (); 1638 } 1639 1640 if (*objidx == 0) 1641 { 1642 c->revert (snap); 1643 return_trace (nullptr); 1644 } 1645 1646 c->add_link (out->subtable, *objidx); 1647 return_trace (out); 1648 } 1649 1650 HBUINT16 platformID; /* Platform ID. */ 1651 HBUINT16 encodingID; /* Platform-specific encoding ID. */ 1652 Offset32To<CmapSubtable> 1653 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ 1654 public: 1655 DEFINE_SIZE_STATIC (8); 1656 }; 1657 1658 struct cmap; 1659 1660 struct SubtableUnicodesCache { 1661 1662 private: 1663 hb_blob_ptr_t<cmap> base_blob; 1664 const char* base; 1665 hb_hashmap_t<unsigned, hb::unique_ptr<hb_set_t>> cached_unicodes; 1666 1667 public: 1668 1669 static SubtableUnicodesCache* create (hb_blob_ptr_t<cmap> source_table) 1670 { 1671 SubtableUnicodesCache* cache = 1672 (SubtableUnicodesCache*) hb_malloc (sizeof(SubtableUnicodesCache)); 1673 new (cache) SubtableUnicodesCache (source_table); 1674 return cache; 1675 } 1676 1677 static void destroy (void* value) { 1678 if (!value) return; 1679 1680 SubtableUnicodesCache* cache = (SubtableUnicodesCache*) value; 1681 cache->~SubtableUnicodesCache (); 1682 hb_free (cache); 1683 } 1684 1685 SubtableUnicodesCache(const void* cmap_base) 1686 : base_blob(), 1687 base ((const char*) cmap_base), 1688 cached_unicodes () 1689 {} 1690 1691 SubtableUnicodesCache(hb_blob_ptr_t<cmap> base_blob_) 1692 : base_blob(base_blob_), 1693 base ((const char *) base_blob.get()), 1694 cached_unicodes () 1695 {} 1696 1697 ~SubtableUnicodesCache() 1698 { 1699 base_blob.destroy (); 1700 } 1701 1702 bool same_base(const void* other) const 1703 { 1704 return other == (const void*) base; 1705 } 1706 1707 const hb_set_t* set_for (const EncodingRecord* record, 1708 SubtableUnicodesCache& mutable_cache) const 1709 { 1710 if (cached_unicodes.has ((unsigned) ((const char *) record - base))) 1711 return cached_unicodes.get ((unsigned) ((const char *) record - base)); 1712 1713 return mutable_cache.set_for (record); 1714 } 1715 1716 const hb_set_t* set_for (const EncodingRecord* record) 1717 { 1718 if (!cached_unicodes.has ((unsigned) ((const char *) record - base))) 1719 { 1720 hb_set_t *s = hb_set_create (); 1721 if (unlikely (s->in_error ())) 1722 return hb_set_get_empty (); 1723 1724 (base+record->subtable).collect_unicodes (s); 1725 1726 if (unlikely (!cached_unicodes.set ((unsigned) ((const char *) record - base), hb::unique_ptr<hb_set_t> {s}))) 1727 return hb_set_get_empty (); 1728 1729 return s; 1730 } 1731 return cached_unicodes.get ((unsigned) ((const char *) record - base)); 1732 } 1733 1734 }; 1735 1736 static inline uint_fast16_t 1737 _hb_symbol_pua_map (unsigned codepoint) 1738 { 1739 if (codepoint <= 0x00FFu) 1740 { 1741 /* For symbol-encoded OpenType fonts, we duplicate the 1742 * U+F000..F0FF range at U+0000..U+00FF. That's what 1743 * Windows seems to do, and that's hinted about at: 1744 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom 1745 * under "Non-Standard (Symbol) Fonts". */ 1746 return 0xF000u + codepoint; 1747 } 1748 return 0; 1749 } 1750 1751 struct cmap 1752 { 1753 static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap; 1754 1755 1756 static SubtableUnicodesCache* create_filled_cache(hb_blob_ptr_t<cmap> source_table) { 1757 const cmap* cmap = source_table.get(); 1758 auto it = 1759 + hb_iter (cmap->encodingRecord) 1760 | hb_filter ([&](const EncodingRecord& _) { 1761 return cmap::filter_encoding_records_for_subset (cmap, _); 1762 }) 1763 ; 1764 1765 SubtableUnicodesCache* cache = SubtableUnicodesCache::create(source_table); 1766 for (const EncodingRecord& _ : it) 1767 cache->set_for(&_); // populate the cache for this encoding record. 1768 1769 return cache; 1770 } 1771 1772 template<typename Iterator, typename EncodingRecIter, 1773 hb_requires (hb_is_iterator (EncodingRecIter))> 1774 bool serialize (hb_serialize_context_t *c, 1775 Iterator it, 1776 EncodingRecIter encodingrec_iter, 1777 const void *base, 1778 hb_subset_plan_t *plan, 1779 bool drop_format_4 = false) 1780 { 1781 if (unlikely (!c->extend_min ((*this)))) return false; 1782 this->version = 0; 1783 1784 unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0; 1785 auto snap = c->snapshot (); 1786 1787 SubtableUnicodesCache local_unicodes_cache (base); 1788 const SubtableUnicodesCache* unicodes_cache = &local_unicodes_cache; 1789 1790 if (plan->accelerator && 1791 plan->accelerator->cmap_cache && 1792 plan->accelerator->cmap_cache->same_base (base)) 1793 unicodes_cache = plan->accelerator->cmap_cache; 1794 1795 for (const EncodingRecord& _ : encodingrec_iter) 1796 { 1797 if (c->in_error ()) 1798 return false; 1799 1800 unsigned format = (base+_.subtable).u.format.v; 1801 if (format != 4 && format != 12 && format != 14) continue; 1802 1803 const hb_set_t* unicodes_set = unicodes_cache->set_for (&_, local_unicodes_cache); 1804 1805 if (!drop_format_4 && format == 4) 1806 { 1807 c->copy (_, + it | hb_filter (*unicodes_set, hb_first), 4u, base, plan, &format4objidx); 1808 if (c->in_error () && c->only_overflow ()) 1809 { 1810 // cmap4 overflowed, reset and retry serialization without format 4 subtables. 1811 c->revert (snap); 1812 return serialize (c, it, 1813 encodingrec_iter, 1814 base, 1815 plan, 1816 true); 1817 } 1818 } 1819 1820 else if (format == 12) 1821 { 1822 if (_can_drop (_, 1823 *unicodes_set, 1824 base, 1825 *unicodes_cache, 1826 local_unicodes_cache, 1827 + it | hb_map (hb_first), encodingrec_iter)) 1828 continue; 1829 c->copy (_, + it | hb_filter (*unicodes_set, hb_first), 12u, base, plan, &format12objidx); 1830 } 1831 else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx); 1832 } 1833 c->check_assign(this->encodingRecord.len, 1834 (c->length () - cmap::min_size)/EncodingRecord::static_size, 1835 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1836 1837 // Fail if format 4 was dropped and there is no cmap12. 1838 return !drop_format_4 || format12objidx; 1839 } 1840 1841 template<typename Iterator, typename EncodingRecordIterator, 1842 hb_requires (hb_is_iterator (Iterator)), 1843 hb_requires (hb_is_iterator (EncodingRecordIterator))> 1844 bool _can_drop (const EncodingRecord& cmap12, 1845 const hb_set_t& cmap12_unicodes, 1846 const void* base, 1847 const SubtableUnicodesCache& unicodes_cache, 1848 SubtableUnicodesCache& local_unicodes_cache, 1849 Iterator subset_unicodes, 1850 EncodingRecordIterator encoding_records) 1851 { 1852 for (auto cp : + subset_unicodes | hb_filter (cmap12_unicodes)) 1853 { 1854 if (cp >= 0x10000) return false; 1855 } 1856 1857 unsigned target_platform; 1858 unsigned target_encoding; 1859 unsigned target_language = (base+cmap12.subtable).get_language (); 1860 1861 if (cmap12.platformID == 0 && cmap12.encodingID == 4) 1862 { 1863 target_platform = 0; 1864 target_encoding = 3; 1865 } else if (cmap12.platformID == 3 && cmap12.encodingID == 10) { 1866 target_platform = 3; 1867 target_encoding = 1; 1868 } else { 1869 return false; 1870 } 1871 1872 for (const auto& _ : encoding_records) 1873 { 1874 if (_.platformID != target_platform 1875 || _.encodingID != target_encoding 1876 || (base+_.subtable).get_language() != target_language) 1877 continue; 1878 1879 const hb_set_t* sibling_unicodes = unicodes_cache.set_for (&_, local_unicodes_cache); 1880 1881 auto cmap12 = + subset_unicodes | hb_filter (cmap12_unicodes); 1882 auto sibling = + subset_unicodes | hb_filter (*sibling_unicodes); 1883 for (; cmap12 && sibling; cmap12++, sibling++) 1884 { 1885 unsigned a = *cmap12; 1886 unsigned b = *sibling; 1887 if (a != b) return false; 1888 } 1889 1890 return !cmap12 && !sibling; 1891 } 1892 1893 return false; 1894 } 1895 1896 void closure_glyphs (const hb_set_t *unicodes, 1897 hb_set_t *glyphset) const 1898 { 1899 + hb_iter (encodingRecord) 1900 | hb_map (&EncodingRecord::subtable) 1901 | hb_map (hb_add (this)) 1902 | hb_filter ([&] (const CmapSubtable& _) { return _.u.format.v == 14; }) 1903 | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); }) 1904 ; 1905 } 1906 1907 bool subset (hb_subset_context_t *c) const 1908 { 1909 TRACE_SUBSET (this); 1910 1911 cmap *cmap_prime = c->serializer->start_embed<cmap> (); 1912 1913 auto encodingrec_iter = 1914 + hb_iter (encodingRecord) 1915 | hb_filter ([&](const EncodingRecord& _) { 1916 return cmap::filter_encoding_records_for_subset (this, _); 1917 }) 1918 ; 1919 1920 if (unlikely (!encodingrec_iter.len ())) return_trace (false); 1921 1922 const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr; 1923 bool has_format12 = false; 1924 1925 for (const EncodingRecord& _ : encodingrec_iter) 1926 { 1927 unsigned format = (this + _.subtable).u.format.v; 1928 if (format == 12) has_format12 = true; 1929 1930 const EncodingRecord *table = std::addressof (_); 1931 if (_.platformID == 0 && _.encodingID == 3) unicode_bmp = table; 1932 else if (_.platformID == 0 && _.encodingID == 4) unicode_ucs4 = table; 1933 else if (_.platformID == 3 && _.encodingID == 1) ms_bmp = table; 1934 else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table; 1935 } 1936 1937 if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false); 1938 if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false); 1939 1940 auto it = 1941 + c->plan->unicode_to_new_gid_list.iter () 1942 | hb_filter ([&] (const hb_codepoint_pair_t _) 1943 { return (_.second != HB_MAP_VALUE_INVALID); }) 1944 ; 1945 1946 return_trace (cmap_prime->serialize (c->serializer, 1947 it, 1948 encodingrec_iter, 1949 this, 1950 c->plan)); 1951 } 1952 1953 const CmapSubtable *find_best_subtable (bool *symbol = nullptr, 1954 bool *mac = nullptr, 1955 bool *macroman = nullptr) const 1956 { 1957 if (symbol) *symbol = false; 1958 if (mac) *mac = false; 1959 if (macroman) *macroman = false; 1960 1961 const CmapSubtable *subtable; 1962 1963 /* Symbol subtable. 1964 * Prefer symbol if available. 1965 * https://github.com/harfbuzz/harfbuzz/issues/1918 */ 1966 if ((subtable = this->find_subtable (3, 0))) 1967 { 1968 if (symbol) *symbol = true; 1969 return subtable; 1970 } 1971 1972 /* 32-bit subtables. */ 1973 if ((subtable = this->find_subtable (3, 10))) return subtable; 1974 if ((subtable = this->find_subtable (0, 6))) return subtable; 1975 if ((subtable = this->find_subtable (0, 4))) return subtable; 1976 1977 /* 16-bit subtables. */ 1978 if ((subtable = this->find_subtable (3, 1))) return subtable; 1979 if ((subtable = this->find_subtable (0, 3))) return subtable; 1980 if ((subtable = this->find_subtable (0, 2))) return subtable; 1981 if ((subtable = this->find_subtable (0, 1))) return subtable; 1982 if ((subtable = this->find_subtable (0, 0))) return subtable; 1983 1984 /* MacRoman subtable. */ 1985 if ((subtable = this->find_subtable (1, 0))) 1986 { 1987 if (mac) *mac = true; 1988 if (macroman) *macroman = true; 1989 return subtable; 1990 } 1991 /* Any other Mac subtable; we just map ASCII for these. */ 1992 if ((subtable = this->find_subtable (1, 0xFFFF))) 1993 { 1994 if (mac) *mac = true; 1995 return subtable; 1996 } 1997 1998 /* Meh. */ 1999 return &Null (CmapSubtable); 2000 } 2001 2002 struct accelerator_t 2003 { 2004 using cache_t = hb_cache_t<21, 19>; 2005 static_assert (sizeof (cache_t) == 1024, ""); 2006 2007 accelerator_t (hb_face_t *face) 2008 { 2009 this->table = hb_sanitize_context_t ().reference_table<cmap> (face); 2010 bool symbol, mac, macroman; 2011 this->subtable = table->find_best_subtable (&symbol, &mac, ¯oman); 2012 this->subtable_uvs = &Null (CmapSubtableFormat14); 2013 { 2014 const CmapSubtable *st = table->find_subtable (0, 5); 2015 if (st && st->u.format.v == 14) 2016 subtable_uvs = &st->u.format14; 2017 } 2018 2019 #ifndef HB_NO_OT_FONT_CMAP_CACHE 2020 cache = (cache_t *) hb_malloc (sizeof (cache_t)); 2021 if (cache) 2022 new (cache) cache_t (); 2023 else 2024 return; // Such that get_glyph_funcZ remains null. 2025 #endif 2026 2027 this->get_glyph_data = subtable; 2028 #ifndef HB_NO_CMAP_LEGACY_SUBTABLES 2029 if (unlikely (symbol)) 2030 { 2031 switch ((unsigned) face->table.OS2->get_font_page ()) { 2032 case OS2::font_page_t::FONT_PAGE_NONE: 2033 this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable, _hb_symbol_pua_map>; 2034 break; 2035 #ifndef HB_NO_OT_SHAPER_ARABIC_FALLBACK 2036 case OS2::font_page_t::FONT_PAGE_SIMP_ARABIC: 2037 this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable, _hb_arabic_pua_simp_map>; 2038 break; 2039 case OS2::font_page_t::FONT_PAGE_TRAD_ARABIC: 2040 this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable, _hb_arabic_pua_trad_map>; 2041 break; 2042 #endif 2043 default: 2044 this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; 2045 break; 2046 } 2047 } 2048 else if (unlikely (macroman)) 2049 { 2050 this->get_glyph_funcZ = get_glyph_from_macroman<CmapSubtable>; 2051 } 2052 else if (unlikely (mac)) 2053 { 2054 this->get_glyph_funcZ = get_glyph_from_ascii<CmapSubtable>; 2055 } 2056 else 2057 #endif 2058 { 2059 switch (subtable->u.format.v) { 2060 /* Accelerate format 4 and format 12. */ 2061 default: 2062 this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; 2063 break; 2064 case 12: 2065 this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>; 2066 break; 2067 case 4: 2068 { 2069 this->format4_accel.init (&subtable->u.format4); 2070 this->get_glyph_data = &this->format4_accel; 2071 this->get_glyph_funcZ = this->format4_accel.get_glyph_func; 2072 break; 2073 } 2074 } 2075 } 2076 } 2077 ~accelerator_t () 2078 { 2079 #ifndef HB_NO_OT_FONT_CMAP_CACHE 2080 hb_free (cache); 2081 #endif 2082 table.destroy (); 2083 } 2084 2085 inline bool _cached_get (hb_codepoint_t unicode, 2086 hb_codepoint_t *glyph) const 2087 { 2088 #ifndef HB_NO_OT_FONT_CMAP_CACHE 2089 // cache is always non-null if we have a get_glyph_funcZ 2090 unsigned v; 2091 if (cache->get (unicode, &v)) 2092 { 2093 *glyph = v; 2094 return true; 2095 } 2096 #endif 2097 bool ret = this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph); 2098 2099 #ifndef HB_NO_OT_FONT_CMAP_CACHE 2100 if (ret) 2101 cache->set (unicode, *glyph); 2102 #endif 2103 2104 return ret; 2105 } 2106 2107 bool get_nominal_glyph (hb_codepoint_t unicode, 2108 hb_codepoint_t *glyph) const 2109 { 2110 if (unlikely (!this->get_glyph_funcZ)) return false; 2111 return _cached_get (unicode, glyph); 2112 } 2113 2114 unsigned int get_nominal_glyphs (unsigned int count, 2115 const hb_codepoint_t *first_unicode, 2116 unsigned int unicode_stride, 2117 hb_codepoint_t *first_glyph, 2118 unsigned int glyph_stride) const 2119 { 2120 if (unlikely (!this->get_glyph_funcZ)) return 0; 2121 2122 unsigned int done; 2123 for (done = 0; 2124 done < count && _cached_get (*first_unicode, first_glyph); 2125 done++) 2126 { 2127 first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); 2128 first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); 2129 } 2130 return done; 2131 } 2132 2133 bool get_variation_glyph (hb_codepoint_t unicode, 2134 hb_codepoint_t variation_selector, 2135 hb_codepoint_t *glyph) const 2136 { 2137 switch (this->subtable_uvs->get_glyph_variant (unicode, 2138 variation_selector, 2139 glyph)) 2140 { 2141 case GLYPH_VARIANT_NOT_FOUND: return false; 2142 case GLYPH_VARIANT_FOUND: return true; 2143 case GLYPH_VARIANT_USE_DEFAULT: break; 2144 } 2145 2146 return get_nominal_glyph (unicode, glyph); 2147 } 2148 2149 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const 2150 { subtable->collect_unicodes (out, num_glyphs); } 2151 void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping, 2152 unsigned num_glyphs = UINT_MAX) const 2153 { subtable->collect_mapping (unicodes, mapping, num_glyphs); } 2154 void collect_variation_selectors (hb_set_t *out) const 2155 { subtable_uvs->collect_variation_selectors (out); } 2156 void collect_variation_unicodes (hb_codepoint_t variation_selector, 2157 hb_set_t *out) const 2158 { subtable_uvs->collect_variation_unicodes (variation_selector, out); } 2159 2160 protected: 2161 typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, 2162 hb_codepoint_t codepoint, 2163 hb_codepoint_t *glyph); 2164 typedef uint_fast16_t (*hb_pua_remap_func_t) (unsigned); 2165 2166 template <typename Type> 2167 HB_INTERNAL static bool get_glyph_from (const void *obj, 2168 hb_codepoint_t codepoint, 2169 hb_codepoint_t *glyph) 2170 { 2171 const Type *typed_obj = (const Type *) obj; 2172 return typed_obj->get_glyph (codepoint, glyph); 2173 } 2174 2175 template <typename Type, hb_pua_remap_func_t remap> 2176 HB_INTERNAL static bool get_glyph_from_symbol (const void *obj, 2177 hb_codepoint_t codepoint, 2178 hb_codepoint_t *glyph) 2179 { 2180 const Type *typed_obj = (const Type *) obj; 2181 if (likely (typed_obj->get_glyph (codepoint, glyph))) 2182 return true; 2183 2184 if (hb_codepoint_t c = remap (codepoint)) 2185 return typed_obj->get_glyph (c, glyph); 2186 2187 return false; 2188 } 2189 2190 template <typename Type> 2191 HB_INTERNAL static bool get_glyph_from_ascii (const void *obj, 2192 hb_codepoint_t codepoint, 2193 hb_codepoint_t *glyph) 2194 { 2195 const Type *typed_obj = (const Type *) obj; 2196 return codepoint < 0x80 && typed_obj->get_glyph (codepoint, glyph); 2197 } 2198 2199 template <typename Type> 2200 HB_INTERNAL static bool get_glyph_from_macroman (const void *obj, 2201 hb_codepoint_t codepoint, 2202 hb_codepoint_t *glyph) 2203 { 2204 if (get_glyph_from_ascii<Type> (obj, codepoint, glyph)) 2205 return true; 2206 2207 const Type *typed_obj = (const Type *) obj; 2208 unsigned c = unicode_to_macroman (codepoint); 2209 return c && typed_obj->get_glyph (c, glyph); 2210 } 2211 2212 private: 2213 hb_nonnull_ptr_t<const CmapSubtable> subtable; 2214 hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs; 2215 2216 hb_cmap_get_glyph_func_t get_glyph_funcZ = nullptr; 2217 const void *get_glyph_data = nullptr; 2218 2219 CmapSubtableFormat4::accelerator_t format4_accel; 2220 2221 #ifndef HB_NO_OT_FONT_CMAP_CACHE 2222 cache_t *cache = nullptr; 2223 #endif 2224 2225 public: 2226 hb_blob_ptr_t<cmap> table; 2227 }; 2228 2229 protected: 2230 2231 const CmapSubtable *find_subtable (unsigned int platform_id, 2232 unsigned int encoding_id) const 2233 { 2234 EncodingRecord key; 2235 key.platformID = platform_id; 2236 key.encodingID = encoding_id; 2237 2238 const EncodingRecord &result = encodingRecord.bsearch (key); 2239 if (!result.subtable) 2240 return nullptr; 2241 2242 return &(this+result.subtable); 2243 } 2244 2245 public: 2246 2247 bool sanitize (hb_sanitize_context_t *c) const 2248 { 2249 TRACE_SANITIZE (this); 2250 return_trace (c->check_struct (this) && 2251 hb_barrier () && 2252 likely (version == 0) && 2253 encodingRecord.sanitize (c, this)); 2254 } 2255 2256 private: 2257 2258 static bool filter_encoding_records_for_subset(const cmap* cmap, 2259 const EncodingRecord& _) 2260 { 2261 return 2262 (_.platformID == 0 && _.encodingID == 3) || 2263 (_.platformID == 0 && _.encodingID == 4) || 2264 (_.platformID == 3 && _.encodingID == 1) || 2265 (_.platformID == 3 && _.encodingID == 10) || 2266 (cmap + _.subtable).u.format.v == 14; 2267 } 2268 2269 protected: 2270 HBUINT16 version; /* Table version number (0). */ 2271 SortedArray16Of<EncodingRecord> 2272 encodingRecord; /* Encoding tables. */ 2273 public: 2274 DEFINE_SIZE_ARRAY (4, encodingRecord); 2275 }; 2276 2277 struct cmap_accelerator_t : cmap::accelerator_t { 2278 cmap_accelerator_t (hb_face_t *face) : cmap::accelerator_t (face) {} 2279 }; 2280 2281 } /* namespace OT */ 2282 2283 2284 #endif /* HB_OT_CMAP_TABLE_HH */