cmap.cc (39062B)
1 // Copyright (c) 2009-2017 The OTS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "cmap.h" 6 7 #include <algorithm> 8 #include <set> 9 #include <utility> 10 #include <vector> 11 12 #include "maxp.h" 13 #include "os2.h" 14 15 // cmap - Character To Glyph Index Mapping Table 16 // http://www.microsoft.com/typography/otspec/cmap.htm 17 18 namespace { 19 20 struct CMAPSubtableHeader { 21 uint16_t platform; 22 uint16_t encoding; 23 uint32_t offset; 24 uint16_t format; 25 uint32_t length; 26 uint32_t language; 27 }; 28 29 struct Subtable314Range { 30 uint16_t start_range; 31 uint16_t end_range; 32 int16_t id_delta; 33 uint16_t id_range_offset; 34 uint32_t id_range_offset_offset; 35 }; 36 37 // Glyph array size for the Mac Roman (format 0) table. 38 const size_t kFormat0ArraySize = 256; 39 40 // The upper limit of the Unicode code point. 41 const uint32_t kUnicodeUpperLimit = 0x10FFFF; 42 43 // The maximum number of UVS records (See below). 44 const uint32_t kMaxCMAPSelectorRecords = 259; 45 // The range of UVSes are: 46 // 0x180B-0x180D (3 code points) 47 // 0xFE00-0xFE0F (16 code points) 48 // 0xE0100-0xE01EF (240 code points) 49 const uint32_t kMongolianVSStart = 0x180B; 50 const uint32_t kMongolianVSEnd = 0x180D; 51 const uint32_t kVSStart = 0xFE00; 52 const uint32_t kVSEnd = 0xFE0F; 53 const uint32_t kIVSStart = 0xE0100; 54 const uint32_t kIVSEnd = 0xE01EF; 55 const uint32_t kUVSUpperLimit = 0xFFFFFF; 56 57 } // namespace 58 59 namespace ots { 60 61 // Parses Format 4 tables 62 bool OpenTypeCMAP::ParseFormat4(int platform, int encoding, 63 const uint8_t *data, size_t length, uint16_t num_glyphs) { 64 ots::Buffer subtable(data, length); 65 66 // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the 67 // whole thing and recompacting it, we validate it and include it verbatim 68 // in the output. 69 70 OpenTypeOS2 *os2 = static_cast<OpenTypeOS2*>( 71 GetFont()->GetTypedTable(OTS_TAG_OS2)); 72 if (!os2) { 73 return Error("Required OS/2 table missing"); 74 } 75 76 if (!subtable.Skip(4)) { 77 return Error("Can't read 4 bytes at start of cmap format 4 subtable"); 78 } 79 uint16_t language = 0; 80 if (!subtable.ReadU16(&language)) { 81 return Error("Can't read language"); 82 } 83 if (language) { 84 // Platform ID 3 (windows) subtables should have language '0'. 85 return Error("Languages should be 0 (%d)", language); 86 } 87 88 uint16_t segcountx2, search_range, entry_selector, range_shift; 89 segcountx2 = search_range = entry_selector = range_shift = 0; 90 if (!subtable.ReadU16(&segcountx2) || 91 !subtable.ReadU16(&search_range) || 92 !subtable.ReadU16(&entry_selector) || 93 !subtable.ReadU16(&range_shift)) { 94 return Error("Failed to read subcmap structure"); 95 } 96 97 if (segcountx2 & 1 || search_range & 1) { 98 return Error("Bad subcmap structure"); 99 } 100 const uint16_t segcount = segcountx2 >> 1; 101 // There must be at least one segment according the spec. 102 if (segcount < 1) { 103 return Error("Segcount < 1 (%d)", segcount); 104 } 105 106 // log2segcount is the maximal x s.t. 2^x < segcount 107 unsigned log2segcount = 0; 108 while (1u << (log2segcount + 1) <= segcount) { 109 log2segcount++; 110 } 111 112 const uint16_t expected_search_range = 2 * 1u << log2segcount; 113 if (expected_search_range != search_range) { 114 return Error("expected search range != search range (%d != %d)", expected_search_range, search_range); 115 } 116 117 if (entry_selector != log2segcount) { 118 return Error("entry selector != log2(segement count) (%d != %d)", entry_selector, log2segcount); 119 } 120 121 const uint16_t expected_range_shift = segcountx2 - search_range; 122 if (range_shift != expected_range_shift) { 123 return Error("unexpected range shift (%d != %d)", range_shift, expected_range_shift); 124 } 125 126 std::vector<Subtable314Range> ranges(segcount); 127 128 for (unsigned i = 0; i < segcount; ++i) { 129 if (!subtable.ReadU16(&ranges[i].end_range)) { 130 return Error("Failed to read segment %d", i); 131 } 132 } 133 134 uint16_t padding; 135 if (!subtable.ReadU16(&padding)) { 136 return Error("Failed to read cmap subtable segment padding"); 137 } 138 if (padding) { 139 return Error("Non zero cmap subtable segment padding (%d)", padding); 140 } 141 142 for (unsigned i = 0; i < segcount; ++i) { 143 if (!subtable.ReadU16(&ranges[i].start_range)) { 144 return Error("Failed to read segment start range %d", i); 145 } 146 } 147 for (unsigned i = 0; i < segcount; ++i) { 148 if (!subtable.ReadS16(&ranges[i].id_delta)) { 149 return Error("Failed to read segment delta %d", i); 150 } 151 } 152 for (unsigned i = 0; i < segcount; ++i) { 153 ranges[i].id_range_offset_offset = subtable.offset(); 154 if (!subtable.ReadU16(&ranges[i].id_range_offset)) { 155 return Error("Failed to read segment range offset %d", i); 156 } 157 158 if (ranges[i].id_range_offset & 1) { 159 // Some font generators seem to put 65535 on id_range_offset 160 // for 0xFFFF-0xFFFF range. 161 // (e.g., many fonts in http://www.princexml.com/fonts/) 162 if (i == segcount - 1u) { 163 Warning("bad id_range_offset"); 164 ranges[i].id_range_offset = 0; 165 // The id_range_offset value in the transcoded font will not change 166 // since this table is not actually "transcoded" yet. 167 } else { 168 return Error("Bad segment offset (%d)", ranges[i].id_range_offset); 169 } 170 } 171 } 172 173 // ranges must be ascending order, based on the end_code. Ranges may not 174 // overlap. 175 for (unsigned i = 1; i < segcount; ++i) { 176 if ((i == segcount - 1u) && 177 (ranges[i - 1].start_range == 0xffff) && 178 (ranges[i - 1].end_range == 0xffff) && 179 (ranges[i].start_range == 0xffff) && 180 (ranges[i].end_range == 0xffff)) { 181 // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators. 182 // We'll accept them as an exception. 183 Warning("multiple 0xffff terminators found"); 184 continue; 185 } 186 187 // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have 188 // unsorted table... 189 if (ranges[i].end_range <= ranges[i - 1].end_range) { 190 return Error("Out of order end range (%d <= %d)", ranges[i].end_range, ranges[i-1].end_range); 191 } 192 if (ranges[i].start_range <= ranges[i - 1].end_range) { 193 return Error("out of order start range (%d <= %d)", ranges[i].start_range, ranges[i-1].end_range); 194 } 195 196 // On many fonts, the value of {first, last}_char_index are incorrect. 197 // Fix them. 198 if (os2->table.first_char_index != 0xFFFF && 199 ranges[i].start_range != 0xFFFF && 200 os2->table.first_char_index > ranges[i].start_range) { 201 os2->table.first_char_index = ranges[i].start_range; 202 } 203 if (os2->table.last_char_index != 0xFFFF && 204 ranges[i].end_range != 0xFFFF && 205 os2->table.last_char_index < ranges[i].end_range) { 206 os2->table.last_char_index = ranges[i].end_range; 207 } 208 } 209 210 // The last range must end at 0xffff 211 if (ranges[segcount - 1].start_range != 0xffff || ranges[segcount - 1].end_range != 0xffff) { 212 return Error("Final segment start and end must be 0xFFFF (0x%04X-0x%04X)", 213 ranges[segcount - 1].start_range, ranges[segcount - 1].end_range); 214 } 215 216 // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of 217 // each code-point defined in the table and make sure that they are all valid 218 // glyphs and that we don't access anything out-of-bounds. 219 for (unsigned i = 0; i < segcount; ++i) { 220 for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) { 221 const uint16_t code_point = static_cast<uint16_t>(cp); 222 if (ranges[i].id_range_offset == 0) { 223 // this is explictly allowed to overflow in the spec 224 const uint16_t glyph = code_point + ranges[i].id_delta; 225 if (glyph >= num_glyphs) { 226 return Error("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1); 227 } 228 } else { 229 const uint16_t range_delta = code_point - ranges[i].start_range; 230 // this might seem odd, but it's true. The offset is relative to the 231 // location of the offset value itself. 232 const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset + 233 ranges[i].id_range_offset + 234 range_delta * 2; 235 // We need to be able to access a 16-bit value from this offset 236 if (glyph_id_offset + 1 >= length) { 237 return Error("bad glyph id offset (%d > %ld)", glyph_id_offset, length); 238 } 239 uint16_t glyph; 240 std::memcpy(&glyph, data + glyph_id_offset, 2); 241 glyph = ots_ntohs(glyph); 242 if (glyph >= num_glyphs) { 243 return Error("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1); 244 } 245 } 246 } 247 } 248 249 // We accept the table. 250 // TODO(yusukes): transcode the subtable. 251 if (platform == 3 && encoding == 0) { 252 this->subtable_3_0_4_data = data; 253 this->subtable_3_0_4_length = length; 254 } else if (platform == 3 && encoding == 1) { 255 this->subtable_3_1_4_data = data; 256 this->subtable_3_1_4_length = length; 257 } else if (platform == 0 && encoding == 3) { 258 this->subtable_0_3_4_data = data; 259 this->subtable_0_3_4_length = length; 260 } else { 261 return Error("Unknown cmap subtable type (platform=%d, encoding=%d)", platform, encoding); 262 } 263 264 return true; 265 } 266 267 bool OpenTypeCMAP::Parse31012(const uint8_t *data, size_t length, 268 uint16_t num_glyphs) { 269 ots::Buffer subtable(data, length); 270 271 // Format 12 tables are simple. We parse these and fully serialise them 272 // later. 273 274 if (!subtable.Skip(8)) { 275 return Error("failed to skip the first 8 bytes of format 12 subtable"); 276 } 277 uint32_t language = 0; 278 if (!subtable.ReadU32(&language)) { 279 return Error("can't read format 12 subtable language"); 280 } 281 if (language) { 282 return Error("format 12 subtable language should be zero (%d)", language); 283 } 284 285 uint32_t num_groups = 0; 286 if (!subtable.ReadU32(&num_groups)) { 287 return Error("can't read number of format 12 subtable groups"); 288 } 289 if (num_groups == 0 || subtable.remaining() / 12 < num_groups) { 290 return Error("Bad format 12 subtable group count %d", num_groups); 291 } 292 293 std::vector<ots::OpenTypeCMAPSubtableRange> &groups 294 = this->subtable_3_10_12; 295 groups.resize(num_groups); 296 297 for (unsigned i = 0; i < num_groups; ++i) { 298 if (!subtable.ReadU32(&groups[i].start_range) || 299 !subtable.ReadU32(&groups[i].end_range) || 300 !subtable.ReadU32(&groups[i].start_glyph_id)) { 301 return Error("can't read format 12 subtable group"); 302 } 303 304 if (groups[i].start_range > kUnicodeUpperLimit || 305 groups[i].end_range > kUnicodeUpperLimit || 306 groups[i].start_glyph_id > 0xFFFF) { 307 return Error("bad format 12 subtable group (startCharCode=0x%4X, endCharCode=0x%4X, startGlyphID=%d)", 308 groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id); 309 } 310 311 // We assert that the glyph value is within range. Because of the range 312 // limits, above, we don't need to worry about overflow. 313 if (groups[i].end_range < groups[i].start_range) { 314 return Error("format 12 subtable group endCharCode before startCharCode (0x%4X < 0x%4X)", 315 groups[i].end_range, groups[i].start_range); 316 } 317 if ((groups[i].end_range - groups[i].start_range) + 318 groups[i].start_glyph_id > num_glyphs) { 319 return Error("bad format 12 subtable group startGlyphID (%d)", groups[i].start_glyph_id); 320 } 321 } 322 323 // the groups must be sorted by start code and may not overlap 324 for (unsigned i = 1; i < num_groups; ++i) { 325 if (groups[i].start_range <= groups[i - 1].start_range) { 326 return Error("out of order format 12 subtable group (startCharCode=0x%4X <= startCharCode=0x%4X of previous group)", 327 groups[i].start_range, groups[i-1].start_range); 328 } 329 if (groups[i].start_range <= groups[i - 1].end_range) { 330 return Error("overlapping format 12 subtable groups (startCharCode=0x%4X <= endCharCode=0x%4X of previous group)", 331 groups[i].start_range, groups[i-1].end_range); 332 } 333 } 334 335 return true; 336 } 337 338 bool OpenTypeCMAP::Parse31013(const uint8_t *data, size_t length, 339 uint16_t num_glyphs) { 340 ots::Buffer subtable(data, length); 341 342 // Format 13 tables are simple. We parse these and fully serialise them 343 // later. 344 345 if (!subtable.Skip(8)) { 346 return Error("Bad cmap subtable length"); 347 } 348 uint32_t language = 0; 349 if (!subtable.ReadU32(&language)) { 350 return Error("Can't read cmap subtable language"); 351 } 352 if (language) { 353 return Error("Cmap subtable language should be zero but is %d", language); 354 } 355 356 uint32_t num_groups = 0; 357 if (!subtable.ReadU32(&num_groups)) { 358 return Error("Can't read number of groups in a cmap subtable"); 359 } 360 361 // We limit the number of groups in the same way as in 3.10.12 tables. See 362 // the comment there in 363 if (num_groups == 0 || subtable.remaining() / 12 < num_groups) { 364 return Error("Bad format 13 subtable group count %d", num_groups); 365 } 366 367 std::vector<ots::OpenTypeCMAPSubtableRange> &groups = this->subtable_3_10_13; 368 groups.resize(num_groups); 369 370 for (unsigned i = 0; i < num_groups; ++i) { 371 if (!subtable.ReadU32(&groups[i].start_range) || 372 !subtable.ReadU32(&groups[i].end_range) || 373 !subtable.ReadU32(&groups[i].start_glyph_id)) { 374 return Error("Can't read subrange structure in a cmap subtable"); 375 } 376 377 // We conservatively limit all of the values to protect some parsers from 378 // overflows 379 if (groups[i].start_range > kUnicodeUpperLimit || 380 groups[i].end_range > kUnicodeUpperLimit || 381 groups[i].start_glyph_id > 0xFFFF) { 382 return Error("Bad subrange with start_range=%d, end_range=%d, start_glyph_id=%d", groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id); 383 } 384 385 if (groups[i].start_glyph_id >= num_glyphs) { 386 return Error("Subrange starting glyph id too high (%d > %d)", groups[i].start_glyph_id, num_glyphs); 387 } 388 } 389 390 // the groups must be sorted by start code and may not overlap 391 for (unsigned i = 1; i < num_groups; ++i) { 392 if (groups[i].start_range <= groups[i - 1].start_range) { 393 return Error("Overlapping subrange starts (%d >= %d)", groups[i]. start_range, groups[i-1].start_range); 394 } 395 if (groups[i].start_range <= groups[i - 1].end_range) { 396 return Error("Overlapping subranges (%d <= %d)", groups[i].start_range, groups[i-1].end_range); 397 } 398 } 399 400 return true; 401 } 402 403 bool OpenTypeCMAP::Parse0514(const uint8_t *data, size_t length) { 404 // Unicode Variation Selector table 405 ots::Buffer subtable(data, length); 406 407 // Format 14 tables are simple. We parse these and fully serialise them 408 // later. 409 410 // Skip format (USHORT) and length (ULONG) 411 if (!subtable.Skip(6)) { 412 return Error("Can't read start of cmap subtable"); 413 } 414 415 uint32_t num_records = 0; 416 if (!subtable.ReadU32(&num_records)) { 417 return Error("Can't read number of records in cmap subtable"); 418 } 419 if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) { 420 return Error("Bad format 14 subtable records count %d", num_records); 421 } 422 423 std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records 424 = this->subtable_0_5_14; 425 records.resize(num_records); 426 427 for (unsigned i = 0; i < num_records; ++i) { 428 if (!subtable.ReadU24(&records[i].var_selector) || 429 !subtable.ReadU32(&records[i].default_offset) || 430 !subtable.ReadU32(&records[i].non_default_offset)) { 431 return Error("Can't read record structure of record %d in cmap subtale", i); 432 } 433 // Checks the value of variation selector 434 if (!((records[i].var_selector >= kMongolianVSStart && 435 records[i].var_selector <= kMongolianVSEnd) || 436 (records[i].var_selector >= kVSStart && 437 records[i].var_selector <= kVSEnd) || 438 (records[i].var_selector >= kIVSStart && 439 records[i].var_selector <= kIVSEnd))) { 440 return Error("Bad record variation selector (%04X) in record %i", records[i].var_selector, i); 441 } 442 if (i > 0 && 443 records[i-1].var_selector >= records[i].var_selector) { 444 return Error("Out of order variation selector (%04X >= %04X) in record %d", records[i-1].var_selector, records[i].var_selector, i); 445 } 446 447 // Checks offsets 448 if (!records[i].default_offset && !records[i].non_default_offset) { 449 return Error("No default aoffset in variation selector record %d", i); 450 } 451 if (records[i].default_offset && 452 records[i].default_offset >= length) { 453 return Error("Default offset too high (%d >= %ld) in record %d", records[i].default_offset, length, i); 454 } 455 if (records[i].non_default_offset && 456 records[i].non_default_offset >= length) { 457 return Error("Non default offset too high (%d >= %ld) in record %d", records[i].non_default_offset, length, i); 458 } 459 } 460 461 for (unsigned i = 0; i < num_records; ++i) { 462 // Checks default UVS table 463 if (records[i].default_offset) { 464 subtable.set_offset(records[i].default_offset); 465 uint32_t num_ranges = 0; 466 if (!subtable.ReadU32(&num_ranges)) { 467 return Error("Can't read number of ranges in record %d", i); 468 } 469 if (num_ranges == 0 || subtable.remaining() / 4 < num_ranges) { 470 return Error("Bad number of ranges (%d) in record %d", num_ranges, i); 471 } 472 473 uint32_t last_unicode_value = 0; 474 std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges 475 = records[i].ranges; 476 ranges.resize(num_ranges); 477 478 for (unsigned j = 0; j < num_ranges; ++j) { 479 if (!subtable.ReadU24(&ranges[j].unicode_value) || 480 !subtable.ReadU8(&ranges[j].additional_count)) { 481 return Error("Can't read range info in variation selector record %d", i); 482 } 483 const uint32_t check_value = 484 ranges[j].unicode_value + ranges[j].additional_count; 485 if (ranges[j].unicode_value == 0 || 486 ranges[j].unicode_value > kUnicodeUpperLimit || 487 check_value > kUVSUpperLimit || 488 (last_unicode_value && 489 ranges[j].unicode_value <= last_unicode_value)) { 490 return Error("Bad Unicode value *%04X) in variation selector range %d record %d", ranges[j].unicode_value, j, i); 491 } 492 last_unicode_value = check_value; 493 } 494 } 495 496 // Checks non default UVS table 497 if (records[i].non_default_offset) { 498 subtable.set_offset(records[i].non_default_offset); 499 uint32_t num_mappings = 0; 500 if (!subtable.ReadU32(&num_mappings)) { 501 return Error("Can't read number of mappings in variation selector record %d", i); 502 } 503 if (num_mappings == 0 || subtable.remaining() / 5 < num_mappings) { 504 return Error("Bad number of mappings (%d) in variation selector record %d", num_mappings, i); 505 } 506 507 uint32_t last_unicode_value = 0; 508 std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings 509 = records[i].mappings; 510 mappings.resize(num_mappings); 511 512 for (unsigned j = 0; j < num_mappings; ++j) { 513 if (!subtable.ReadU24(&mappings[j].unicode_value) || 514 !subtable.ReadU16(&mappings[j].glyph_id)) { 515 return Error("Can't read mapping %d in variation selector record %d", j, i); 516 } 517 if (mappings[j].glyph_id == 0 || mappings[j].unicode_value == 0) { 518 return Error("Bad mapping (%04X -> %d) in mapping %d of variation selector %d", mappings[j].unicode_value, mappings[j].glyph_id, j, i); 519 } 520 if (mappings[j].unicode_value > kUnicodeUpperLimit) { 521 return Error("Invalid Unicode value (%04X > %04X) in mapping %d of variation selector %d", mappings[j].unicode_value, kUnicodeUpperLimit, j, i); 522 } 523 if (last_unicode_value && 524 mappings[j].unicode_value <= last_unicode_value) { 525 return Error("Out of order Unicode value (%04X <= %04X) in mapping %d of variation selector %d", mappings[j].unicode_value, last_unicode_value, j, i); 526 } 527 last_unicode_value = mappings[j].unicode_value; 528 } 529 } 530 } 531 532 if (subtable.offset() != length) { 533 return Error("Bad subtable offset (%ld != %ld)", subtable.offset(), length); 534 } 535 this->subtable_0_5_14_length = subtable.offset(); 536 return true; 537 } 538 539 bool OpenTypeCMAP::Parse100(const uint8_t *data, size_t length) { 540 // Mac Roman table 541 ots::Buffer subtable(data, length); 542 543 if (!subtable.Skip(4)) { 544 return Error("Bad cmap subtable"); 545 } 546 uint16_t language = 0; 547 if (!subtable.ReadU16(&language)) { 548 return Error("Can't read language in cmap subtable"); 549 } 550 if (language) { 551 // simsun.ttf has non-zero language id. 552 Warning("language id should be zero: %u", language); 553 } 554 555 this->subtable_1_0_0.reserve(kFormat0ArraySize); 556 for (size_t i = 0; i < kFormat0ArraySize; ++i) { 557 uint8_t glyph_id = 0; 558 if (!subtable.ReadU8(&glyph_id)) { 559 return Error("Can't read glyph id at array[%ld] in cmap subtable", i); 560 } 561 this->subtable_1_0_0.push_back(glyph_id); 562 } 563 564 return true; 565 } 566 567 bool OpenTypeCMAP::Parse(const uint8_t *data, size_t length) { 568 Buffer table(data, length); 569 570 uint16_t version = 0; 571 uint16_t num_tables = 0; 572 if (!table.ReadU16(&version) || 573 !table.ReadU16(&num_tables)) { 574 return Error("Can't read structure of cmap"); 575 } 576 577 if (version != 0) { 578 return Error("Non zero cmap version (%d)", version); 579 } 580 if (!num_tables) { 581 return Error("No subtables in cmap!"); 582 } 583 584 std::vector<CMAPSubtableHeader> subtable_headers; 585 586 // read the subtable headers 587 subtable_headers.reserve(num_tables); 588 for (unsigned i = 0; i < num_tables; ++i) { 589 CMAPSubtableHeader subt; 590 591 if (!table.ReadU16(&subt.platform) || 592 !table.ReadU16(&subt.encoding) || 593 !table.ReadU32(&subt.offset)) { 594 return Error("Can't read subtable information cmap subtable %d", i); 595 } 596 597 subtable_headers.push_back(subt); 598 } 599 600 const size_t data_offset = table.offset(); 601 602 // make sure that all the offsets are valid. 603 for (unsigned i = 0; i < num_tables; ++i) { 604 if (subtable_headers[i].offset > 1024 * 1024 * 1024) { 605 return Error("Bad subtable offset in cmap subtable %d", i); 606 } 607 if (subtable_headers[i].offset < data_offset || 608 subtable_headers[i].offset >= length) { 609 return Error("Bad subtable offset (%d) in cmap subtable %d", subtable_headers[i].offset, i); 610 } 611 } 612 613 // the format of the table is the first couple of bytes in the table. The 614 // length of the table is stored in a format-specific way. 615 for (unsigned i = 0; i < num_tables; ++i) { 616 table.set_offset(subtable_headers[i].offset); 617 if (!table.ReadU16(&subtable_headers[i].format)) { 618 return Error("Can't read cmap subtable header format %d", i); 619 } 620 621 uint16_t len = 0; 622 uint16_t lang = 0; 623 switch (subtable_headers[i].format) { 624 case 0: 625 case 4: 626 if (!table.ReadU16(&len)) { 627 return Error("Can't read cmap subtable %d length", i); 628 } 629 if (!table.ReadU16(&lang)) { 630 return Error("Can't read cmap subtable %d language", i); 631 } 632 subtable_headers[i].length = len; 633 subtable_headers[i].language = lang; 634 break; 635 case 12: 636 case 13: 637 if (!table.Skip(2)) { 638 return Error("Bad cmap subtable %d structure", i); 639 } 640 if (!table.ReadU32(&subtable_headers[i].length)) { 641 return Error("Can read cmap subtable %d length", i); 642 } 643 if (!table.ReadU32(&subtable_headers[i].language)) { 644 return Error("Can't read cmap subtable %d language", i); 645 } 646 break; 647 case 14: 648 if (!table.ReadU32(&subtable_headers[i].length)) { 649 return Error("Can't read cmap subtable %d length", i); 650 } 651 subtable_headers[i].language = 0; 652 break; 653 default: 654 subtable_headers[i].length = 0; 655 subtable_headers[i].language = 0; 656 break; 657 } 658 } 659 660 // check if the table is sorted first by platform ID, then by encoding ID. 661 for (unsigned i = 1; i < num_tables; ++i) { 662 if (subtable_headers[i - 1].platform > subtable_headers[i].platform || 663 (subtable_headers[i - 1].platform == subtable_headers[i].platform && 664 (subtable_headers[i - 1].encoding > subtable_headers[i].encoding || 665 (subtable_headers[i - 1].encoding == subtable_headers[i].encoding && 666 subtable_headers[i - 1].language > subtable_headers[i].language)))) 667 Warning("subtable %d with platform ID %d, encoding ID %d, language ID %d " 668 "following subtable with platform ID %d, encoding ID %d, language ID %d", 669 i, 670 subtable_headers[i].platform, 671 subtable_headers[i].encoding, 672 subtable_headers[i].language, 673 subtable_headers[i - 1].platform, 674 subtable_headers[i - 1].encoding, 675 subtable_headers[i - 1].language); 676 } 677 678 // Now, verify that all the lengths are sane 679 for (unsigned i = 0; i < num_tables; ++i) { 680 if (!subtable_headers[i].length) continue; 681 if (subtable_headers[i].length > 1024 * 1024 * 1024) { 682 return Error("Bad cmap subtable %d length", i); 683 } 684 // We know that both the offset and length are < 1GB, so the following 685 // addition doesn't overflow 686 const uint32_t end_byte 687 = subtable_headers[i].offset + subtable_headers[i].length; 688 if (end_byte > length) { 689 return Error("Over long cmap subtable %d @ %d for %d", i, subtable_headers[i].offset, subtable_headers[i].length); 690 } 691 } 692 693 // check that the cmap subtables are not overlapping. 694 std::set<std::pair<uint32_t, uint32_t> > uniq_checker; 695 std::vector<std::pair<uint32_t, uint8_t> > overlap_checker; 696 for (unsigned i = 0; i < num_tables; ++i) { 697 const uint32_t end_byte 698 = subtable_headers[i].offset + subtable_headers[i].length; 699 700 if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset, 701 end_byte)).second) { 702 // Sometimes Unicode table and MS table share exactly the same data. 703 // We'll allow this. 704 continue; 705 } 706 overlap_checker.push_back( 707 std::make_pair(subtable_headers[i].offset, 708 static_cast<uint8_t>(1) /* start */)); 709 overlap_checker.push_back( 710 std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */)); 711 } 712 std::sort(overlap_checker.begin(), overlap_checker.end()); 713 int overlap_count = 0; 714 for (unsigned i = 0; i < overlap_checker.size(); ++i) { 715 overlap_count += (overlap_checker[i].second ? 1 : -1); 716 if (overlap_count > 1) { 717 return Error("Excessive overlap count %d", overlap_count); 718 } 719 } 720 721 // we grab the number of glyphs in the file from the maxp table to make sure 722 // that the character map isn't referencing anything beyound this range. 723 OpenTypeMAXP *maxp = static_cast<OpenTypeMAXP*>( 724 GetFont()->GetTypedTable(OTS_TAG_MAXP)); 725 if (!maxp) { 726 return Error("No maxp table in font! Needed by cmap."); 727 } 728 const uint16_t num_glyphs = maxp->num_glyphs; 729 730 // We only support a subset of the possible character map tables. Microsoft 731 // 'strongly recommends' that everyone supports the Unicode BMP table with 732 // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables: 733 // Platform ID Encoding ID Format 734 // 0 0 4 (Unicode Default) 735 // 0 1 4 (Unicode 1.1) 736 // 0 3 4 (Unicode BMP) 737 // 0 3 12 (Unicode UCS-4) 738 // 0 5 14 (Unicode Variation Sequences) 739 // 1 0 0 (Mac Roman) 740 // 3 0 4 (MS Symbol) 741 // 3 1 4 (MS Unicode BMP) 742 // 3 10 12 (MS Unicode UCS-4) 743 // 3 10 13 (MS UCS-4 Fallback mapping) 744 // 745 // Note: 746 // * 0-0-4 and 0-1-4 tables are (usually) written as a 3-1-4 table. If 3-1-4 table 747 // also exists, the 0-0-4 or 0-1-4 tables are ignored. 748 // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table. 749 // Some fonts which include 0-5-14 table seems to be required 0-3-4 750 // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists. 751 // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also 752 // exists, the 0-3-12 table is ignored. 753 // 754 755 for (unsigned i = 0; i < num_tables; ++i) { 756 if (subtable_headers[i].platform == 0) { 757 // Unicode platform 758 759 if ((subtable_headers[i].encoding == 0 || subtable_headers[i].encoding == 1) && 760 (subtable_headers[i].format == 4)) { 761 // parse and output the 0-0-4 and 0-1-4 tables as 3-1-4 table. Sometimes the 0-0-4 762 // table actually points to MS symbol data and thus should be parsed as 763 // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be 764 // recovered in ots_cmap_serialise(). 765 if (!ParseFormat4(3, 1, data + subtable_headers[i].offset, 766 subtable_headers[i].length, num_glyphs)) { 767 return Error("Failed to parse format 4 cmap subtable %d", i); 768 } 769 } else if ((subtable_headers[i].encoding == 3) && 770 (subtable_headers[i].format == 4)) { 771 // parse and output the 0-3-4 table as 0-3-4 table. 772 if (!ParseFormat4(0, 3, data + subtable_headers[i].offset, 773 subtable_headers[i].length, num_glyphs)) { 774 return Error("Failed to parse format 4 cmap subtable %d", i); 775 } 776 } else if ((subtable_headers[i].encoding == 3 || 777 subtable_headers[i].encoding == 4) && 778 (subtable_headers[i].format == 12)) { 779 // parse and output the 0-3-12 or 0-4-12 tables as 3-10-12 table. 780 if (!Parse31012(data + subtable_headers[i].offset, 781 subtable_headers[i].length, num_glyphs)) { 782 return Error("Failed to parse format 12 cmap subtable %d", i); 783 } 784 } else if ((subtable_headers[i].encoding == 5) && 785 (subtable_headers[i].format == 14)) { 786 if (!Parse0514(data + subtable_headers[i].offset, 787 subtable_headers[i].length)) { 788 return Error("Failed to parse format 14 cmap subtable %d", i); 789 } 790 } 791 } else if (subtable_headers[i].platform == 1) { 792 // Mac platform 793 794 if ((subtable_headers[i].encoding == 0) && 795 (subtable_headers[i].format == 0)) { 796 // parse and output the 1-0-0 table. 797 if (!Parse100(data + subtable_headers[i].offset, 798 subtable_headers[i].length)) { 799 return OTS_FAILURE(); 800 } 801 } 802 } else if (subtable_headers[i].platform == 3) { 803 // MS platform 804 805 switch (subtable_headers[i].encoding) { 806 case 0: 807 case 1: 808 if (subtable_headers[i].format == 4) { 809 // parse 3-0-4 or 3-1-4 table. 810 if (!ParseFormat4(subtable_headers[i].platform, 811 subtable_headers[i].encoding, 812 data + subtable_headers[i].offset, 813 subtable_headers[i].length, num_glyphs)) { 814 return OTS_FAILURE(); 815 } 816 } 817 break; 818 case 10: 819 if (subtable_headers[i].format == 12) { 820 this->subtable_3_10_12.clear(); 821 if (!Parse31012(data + subtable_headers[i].offset, 822 subtable_headers[i].length, num_glyphs)) { 823 return OTS_FAILURE(); 824 } 825 } else if (subtable_headers[i].format == 13) { 826 this->subtable_3_10_13.clear(); 827 if (!Parse31013(data + subtable_headers[i].offset, 828 subtable_headers[i].length, num_glyphs)) { 829 return OTS_FAILURE(); 830 } 831 } 832 break; 833 } 834 } 835 } 836 837 return true; 838 } 839 840 bool OpenTypeCMAP::Serialize(OTSStream *out) { 841 const bool have_034 = this->subtable_0_3_4_data != NULL; 842 const bool have_0514 = this->subtable_0_5_14.size() != 0; 843 const bool have_100 = this->subtable_1_0_0.size() != 0; 844 const bool have_304 = this->subtable_3_0_4_data != NULL; 845 // MS Symbol and MS Unicode tables should not co-exist. 846 // See the comment above in 0-0-4 parser. 847 const bool have_314 = (!have_304) && this->subtable_3_1_4_data; 848 const bool have_31012 = this->subtable_3_10_12.size() != 0; 849 const bool have_31013 = this->subtable_3_10_13.size() != 0; 850 const uint16_t num_subtables = static_cast<uint16_t>(have_034) + 851 static_cast<uint16_t>(have_0514) + 852 static_cast<uint16_t>(have_100) + 853 static_cast<uint16_t>(have_304) + 854 static_cast<uint16_t>(have_314) + 855 static_cast<uint16_t>(have_31012) + 856 static_cast<uint16_t>(have_31013); 857 const off_t table_start = out->Tell(); 858 859 // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables 860 // (e.g., old fonts for Mac). We don't support them. 861 if (!have_304 && !have_314 && !have_034 && !have_31012 && !have_31013) { 862 return Error("no supported subtables were found"); 863 } 864 865 if (!out->WriteU16(0) || 866 !out->WriteU16(num_subtables)) { 867 return OTS_FAILURE(); 868 } 869 870 const off_t record_offset = out->Tell(); 871 if (!out->Pad(num_subtables * 8)) { 872 return OTS_FAILURE(); 873 } 874 875 const off_t offset_034 = out->Tell(); 876 if (have_034) { 877 if (!out->Write(this->subtable_0_3_4_data, 878 this->subtable_0_3_4_length)) { 879 return OTS_FAILURE(); 880 } 881 } 882 883 const off_t offset_0514 = out->Tell(); 884 if (have_0514) { 885 const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records 886 = this->subtable_0_5_14; 887 const unsigned num_records = records.size(); 888 if (!out->WriteU16(14) || 889 !out->WriteU32(this->subtable_0_5_14_length) || 890 !out->WriteU32(num_records)) { 891 return OTS_FAILURE(); 892 } 893 for (unsigned i = 0; i < num_records; ++i) { 894 if (!out->WriteU24(records[i].var_selector) || 895 !out->WriteU32(records[i].default_offset) || 896 !out->WriteU32(records[i].non_default_offset)) { 897 return OTS_FAILURE(); 898 } 899 } 900 for (unsigned i = 0; i < num_records; ++i) { 901 if (records[i].default_offset) { 902 const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges 903 = records[i].ranges; 904 const unsigned num_ranges = ranges.size(); 905 if (!out->Seek(records[i].default_offset + offset_0514) || 906 !out->WriteU32(num_ranges)) { 907 return OTS_FAILURE(); 908 } 909 for (unsigned j = 0; j < num_ranges; ++j) { 910 if (!out->WriteU24(ranges[j].unicode_value) || 911 !out->WriteU8(ranges[j].additional_count)) { 912 return OTS_FAILURE(); 913 } 914 } 915 } 916 if (records[i].non_default_offset) { 917 const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings 918 = records[i].mappings; 919 const unsigned num_mappings = mappings.size(); 920 if (!out->Seek(records[i].non_default_offset + offset_0514) || 921 !out->WriteU32(num_mappings)) { 922 return OTS_FAILURE(); 923 } 924 for (unsigned j = 0; j < num_mappings; ++j) { 925 if (!out->WriteU24(mappings[j].unicode_value) || 926 !out->WriteU16(mappings[j].glyph_id)) { 927 return OTS_FAILURE(); 928 } 929 } 930 } 931 } 932 } 933 934 const off_t offset_100 = out->Tell(); 935 if (have_100) { 936 if (!out->WriteU16(0) || // format 937 !out->WriteU16(6 + kFormat0ArraySize) || // length 938 !out->WriteU16(0)) { // language 939 return OTS_FAILURE(); 940 } 941 if (!out->Write(&(this->subtable_1_0_0[0]), kFormat0ArraySize)) { 942 return OTS_FAILURE(); 943 } 944 } 945 946 const off_t offset_304 = out->Tell(); 947 if (have_304) { 948 if (!out->Write(this->subtable_3_0_4_data, 949 this->subtable_3_0_4_length)) { 950 return OTS_FAILURE(); 951 } 952 } 953 954 const off_t offset_314 = out->Tell(); 955 if (have_314) { 956 if (!out->Write(this->subtable_3_1_4_data, 957 this->subtable_3_1_4_length)) { 958 return OTS_FAILURE(); 959 } 960 } 961 962 const off_t offset_31012 = out->Tell(); 963 if (have_31012) { 964 std::vector<OpenTypeCMAPSubtableRange> &groups 965 = this->subtable_3_10_12; 966 const unsigned num_groups = groups.size(); 967 if (!out->WriteU16(12) || 968 !out->WriteU16(0) || 969 !out->WriteU32(num_groups * 12 + 16) || 970 !out->WriteU32(0) || 971 !out->WriteU32(num_groups)) { 972 return OTS_FAILURE(); 973 } 974 975 for (unsigned i = 0; i < num_groups; ++i) { 976 if (!out->WriteU32(groups[i].start_range) || 977 !out->WriteU32(groups[i].end_range) || 978 !out->WriteU32(groups[i].start_glyph_id)) { 979 return OTS_FAILURE(); 980 } 981 } 982 } 983 984 const off_t offset_31013 = out->Tell(); 985 if (have_31013) { 986 std::vector<OpenTypeCMAPSubtableRange> &groups 987 = this->subtable_3_10_13; 988 const unsigned num_groups = groups.size(); 989 if (!out->WriteU16(13) || 990 !out->WriteU16(0) || 991 !out->WriteU32(num_groups * 12 + 16) || 992 !out->WriteU32(0) || 993 !out->WriteU32(num_groups)) { 994 return OTS_FAILURE(); 995 } 996 997 for (unsigned i = 0; i < num_groups; ++i) { 998 if (!out->WriteU32(groups[i].start_range) || 999 !out->WriteU32(groups[i].end_range) || 1000 !out->WriteU32(groups[i].start_glyph_id)) { 1001 return OTS_FAILURE(); 1002 } 1003 } 1004 } 1005 1006 const off_t table_end = out->Tell(); 1007 1008 // Now seek back and write the table of offsets 1009 if (!out->Seek(record_offset)) { 1010 return OTS_FAILURE(); 1011 } 1012 1013 if (have_034) { 1014 if (!out->WriteU16(0) || 1015 !out->WriteU16(3) || 1016 !out->WriteU32(offset_034 - table_start)) { 1017 return OTS_FAILURE(); 1018 } 1019 } 1020 1021 if (have_0514) { 1022 if (!out->WriteU16(0) || 1023 !out->WriteU16(5) || 1024 !out->WriteU32(offset_0514 - table_start)) { 1025 return OTS_FAILURE(); 1026 } 1027 } 1028 1029 if (have_100) { 1030 if (!out->WriteU16(1) || 1031 !out->WriteU16(0) || 1032 !out->WriteU32(offset_100 - table_start)) { 1033 return OTS_FAILURE(); 1034 } 1035 } 1036 1037 if (have_304) { 1038 if (!out->WriteU16(3) || 1039 !out->WriteU16(0) || 1040 !out->WriteU32(offset_304 - table_start)) { 1041 return OTS_FAILURE(); 1042 } 1043 } 1044 1045 if (have_314) { 1046 if (!out->WriteU16(3) || 1047 !out->WriteU16(1) || 1048 !out->WriteU32(offset_314 - table_start)) { 1049 return OTS_FAILURE(); 1050 } 1051 } 1052 1053 if (have_31012) { 1054 if (!out->WriteU16(3) || 1055 !out->WriteU16(10) || 1056 !out->WriteU32(offset_31012 - table_start)) { 1057 return OTS_FAILURE(); 1058 } 1059 } 1060 1061 if (have_31013) { 1062 if (!out->WriteU16(3) || 1063 !out->WriteU16(10) || 1064 !out->WriteU32(offset_31013 - table_start)) { 1065 return OTS_FAILURE(); 1066 } 1067 } 1068 1069 if (!out->Seek(table_end)) { 1070 return OTS_FAILURE(); 1071 } 1072 1073 return true; 1074 } 1075 1076 } // namespace ots