tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cmap.cc (39062B)


      1 // Copyright (c) 2009-2017 The OTS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "cmap.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 #include <utility>
     10 #include <vector>
     11 
     12 #include "maxp.h"
     13 #include "os2.h"
     14 
     15 // cmap - Character To Glyph Index Mapping Table
     16 // http://www.microsoft.com/typography/otspec/cmap.htm
     17 
     18 namespace {
     19 
     20 struct CMAPSubtableHeader {
     21  uint16_t platform;
     22  uint16_t encoding;
     23  uint32_t offset;
     24  uint16_t format;
     25  uint32_t length;
     26  uint32_t language;
     27 };
     28 
     29 struct Subtable314Range {
     30  uint16_t start_range;
     31  uint16_t end_range;
     32  int16_t id_delta;
     33  uint16_t id_range_offset;
     34  uint32_t id_range_offset_offset;
     35 };
     36 
     37 // Glyph array size for the Mac Roman (format 0) table.
     38 const size_t kFormat0ArraySize = 256;
     39 
     40 // The upper limit of the Unicode code point.
     41 const uint32_t kUnicodeUpperLimit = 0x10FFFF;
     42 
     43 // The maximum number of UVS records (See below).
     44 const uint32_t kMaxCMAPSelectorRecords = 259;
     45 // The range of UVSes are:
     46 //   0x180B-0x180D (3 code points)
     47 //   0xFE00-0xFE0F (16 code points)
     48 //   0xE0100-0xE01EF (240 code points)
     49 const uint32_t kMongolianVSStart = 0x180B;
     50 const uint32_t kMongolianVSEnd = 0x180D;
     51 const uint32_t kVSStart = 0xFE00;
     52 const uint32_t kVSEnd = 0xFE0F;
     53 const uint32_t kIVSStart = 0xE0100;
     54 const uint32_t kIVSEnd = 0xE01EF;
     55 const uint32_t kUVSUpperLimit = 0xFFFFFF;
     56 
     57 } // namespace
     58 
     59 namespace ots {
     60 
     61 // Parses Format 4 tables
     62 bool OpenTypeCMAP::ParseFormat4(int platform, int encoding,
     63              const uint8_t *data, size_t length, uint16_t num_glyphs) {
     64  ots::Buffer subtable(data, length);
     65 
     66  // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
     67  // whole thing and recompacting it, we validate it and include it verbatim
     68  // in the output.
     69 
     70  OpenTypeOS2 *os2 = static_cast<OpenTypeOS2*>(
     71      GetFont()->GetTypedTable(OTS_TAG_OS2));
     72  if (!os2) {
     73    return Error("Required OS/2 table missing");
     74  }
     75 
     76  if (!subtable.Skip(4)) {
     77    return Error("Can't read 4 bytes at start of cmap format 4 subtable");
     78  }
     79  uint16_t language = 0;
     80  if (!subtable.ReadU16(&language)) {
     81    return Error("Can't read language");
     82  }
     83  if (language) {
     84    // Platform ID 3 (windows) subtables should have language '0'.
     85    return Error("Languages should be 0 (%d)", language);
     86  }
     87 
     88  uint16_t segcountx2, search_range, entry_selector, range_shift;
     89  segcountx2 = search_range = entry_selector = range_shift = 0;
     90  if (!subtable.ReadU16(&segcountx2) ||
     91      !subtable.ReadU16(&search_range) ||
     92      !subtable.ReadU16(&entry_selector) ||
     93      !subtable.ReadU16(&range_shift)) {
     94    return Error("Failed to read subcmap structure");
     95  }
     96 
     97  if (segcountx2 & 1 || search_range & 1) {
     98    return Error("Bad subcmap structure");
     99  }
    100  const uint16_t segcount = segcountx2 >> 1;
    101  // There must be at least one segment according the spec.
    102  if (segcount < 1) {
    103    return Error("Segcount < 1 (%d)", segcount);
    104  }
    105 
    106  // log2segcount is the maximal x s.t. 2^x < segcount
    107  unsigned log2segcount = 0;
    108  while (1u << (log2segcount + 1) <= segcount) {
    109    log2segcount++;
    110  }
    111 
    112  const uint16_t expected_search_range = 2 * 1u << log2segcount;
    113  if (expected_search_range != search_range) {
    114    return Error("expected search range != search range (%d != %d)", expected_search_range, search_range);
    115  }
    116 
    117  if (entry_selector != log2segcount) {
    118    return Error("entry selector != log2(segement count) (%d != %d)", entry_selector, log2segcount);
    119  }
    120 
    121  const uint16_t expected_range_shift = segcountx2 - search_range;
    122  if (range_shift != expected_range_shift) {
    123    return Error("unexpected range shift (%d != %d)", range_shift, expected_range_shift);
    124  }
    125 
    126  std::vector<Subtable314Range> ranges(segcount);
    127 
    128  for (unsigned i = 0; i < segcount; ++i) {
    129    if (!subtable.ReadU16(&ranges[i].end_range)) {
    130      return Error("Failed to read segment %d", i);
    131    }
    132  }
    133 
    134  uint16_t padding;
    135  if (!subtable.ReadU16(&padding)) {
    136    return Error("Failed to read cmap subtable segment padding");
    137  }
    138  if (padding) {
    139    return Error("Non zero cmap subtable segment padding (%d)", padding);
    140  }
    141 
    142  for (unsigned i = 0; i < segcount; ++i) {
    143    if (!subtable.ReadU16(&ranges[i].start_range)) {
    144      return Error("Failed to read segment start range %d", i);
    145    }
    146  }
    147  for (unsigned i = 0; i < segcount; ++i) {
    148    if (!subtable.ReadS16(&ranges[i].id_delta)) {
    149      return Error("Failed to read segment delta %d", i);
    150    }
    151  }
    152  for (unsigned i = 0; i < segcount; ++i) {
    153    ranges[i].id_range_offset_offset = subtable.offset();
    154    if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
    155      return Error("Failed to read segment range offset %d", i);
    156    }
    157 
    158    if (ranges[i].id_range_offset & 1) {
    159      // Some font generators seem to put 65535 on id_range_offset
    160      // for 0xFFFF-0xFFFF range.
    161      // (e.g., many fonts in http://www.princexml.com/fonts/)
    162      if (i == segcount - 1u) {
    163        Warning("bad id_range_offset");
    164        ranges[i].id_range_offset = 0;
    165        // The id_range_offset value in the transcoded font will not change
    166        // since this table is not actually "transcoded" yet.
    167      } else {
    168        return Error("Bad segment offset (%d)", ranges[i].id_range_offset);
    169      }
    170    }
    171  }
    172 
    173  // ranges must be ascending order, based on the end_code. Ranges may not
    174  // overlap.
    175  for (unsigned i = 1; i < segcount; ++i) {
    176    if ((i == segcount - 1u) &&
    177        (ranges[i - 1].start_range == 0xffff) &&
    178        (ranges[i - 1].end_range == 0xffff) &&
    179        (ranges[i].start_range == 0xffff) &&
    180        (ranges[i].end_range == 0xffff)) {
    181      // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
    182      // We'll accept them as an exception.
    183      Warning("multiple 0xffff terminators found");
    184      continue;
    185    }
    186 
    187    // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
    188    // unsorted table...
    189    if (ranges[i].end_range <= ranges[i - 1].end_range) {
    190      return Error("Out of order end range (%d <= %d)", ranges[i].end_range, ranges[i-1].end_range);
    191    }
    192    if (ranges[i].start_range <= ranges[i - 1].end_range) {
    193      return Error("out of order start range (%d <= %d)", ranges[i].start_range, ranges[i-1].end_range);
    194    }
    195 
    196    // On many fonts, the value of {first, last}_char_index are incorrect.
    197    // Fix them.
    198    if (os2->table.first_char_index != 0xFFFF &&
    199        ranges[i].start_range != 0xFFFF &&
    200        os2->table.first_char_index > ranges[i].start_range) {
    201      os2->table.first_char_index = ranges[i].start_range;
    202    }
    203    if (os2->table.last_char_index != 0xFFFF &&
    204        ranges[i].end_range != 0xFFFF &&
    205        os2->table.last_char_index < ranges[i].end_range) {
    206      os2->table.last_char_index = ranges[i].end_range;
    207    }
    208  }
    209 
    210  // The last range must end at 0xffff
    211  if (ranges[segcount - 1].start_range != 0xffff || ranges[segcount - 1].end_range != 0xffff) {
    212    return Error("Final segment start and end must be 0xFFFF (0x%04X-0x%04X)",
    213                           ranges[segcount - 1].start_range, ranges[segcount - 1].end_range);
    214  }
    215 
    216  // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
    217  // each code-point defined in the table and make sure that they are all valid
    218  // glyphs and that we don't access anything out-of-bounds.
    219  for (unsigned i = 0; i < segcount; ++i) {
    220    for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
    221      const uint16_t code_point = static_cast<uint16_t>(cp);
    222      if (ranges[i].id_range_offset == 0) {
    223        // this is explictly allowed to overflow in the spec
    224        const uint16_t glyph = code_point + ranges[i].id_delta;
    225        if (glyph >= num_glyphs) {
    226          return Error("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1);
    227        }
    228      } else {
    229        const uint16_t range_delta = code_point - ranges[i].start_range;
    230        // this might seem odd, but it's true. The offset is relative to the
    231        // location of the offset value itself.
    232        const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
    233                                         ranges[i].id_range_offset +
    234                                         range_delta * 2;
    235        // We need to be able to access a 16-bit value from this offset
    236        if (glyph_id_offset + 1 >= length) {
    237          return Error("bad glyph id offset (%d > %ld)", glyph_id_offset, length);
    238        }
    239        uint16_t glyph;
    240        std::memcpy(&glyph, data + glyph_id_offset, 2);
    241        glyph = ots_ntohs(glyph);
    242        if (glyph >= num_glyphs) {
    243          return Error("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1);
    244        }
    245      }
    246    }
    247  }
    248 
    249  // We accept the table.
    250  // TODO(yusukes): transcode the subtable.
    251  if (platform == 3 && encoding == 0) {
    252    this->subtable_3_0_4_data = data;
    253    this->subtable_3_0_4_length = length;
    254  } else if (platform == 3 && encoding == 1) {
    255    this->subtable_3_1_4_data = data;
    256    this->subtable_3_1_4_length = length;
    257  } else if (platform == 0 && encoding == 3) {
    258    this->subtable_0_3_4_data = data;
    259    this->subtable_0_3_4_length = length;
    260  } else {
    261    return Error("Unknown cmap subtable type (platform=%d, encoding=%d)", platform, encoding);
    262  }
    263 
    264  return true;
    265 }
    266 
    267 bool OpenTypeCMAP::Parse31012(const uint8_t *data, size_t length,
    268                              uint16_t num_glyphs) {
    269  ots::Buffer subtable(data, length);
    270 
    271  // Format 12 tables are simple. We parse these and fully serialise them
    272  // later.
    273 
    274  if (!subtable.Skip(8)) {
    275    return Error("failed to skip the first 8 bytes of format 12 subtable");
    276  }
    277  uint32_t language = 0;
    278  if (!subtable.ReadU32(&language)) {
    279    return Error("can't read format 12 subtable language");
    280  }
    281  if (language) {
    282    return Error("format 12 subtable language should be zero (%d)", language);
    283  }
    284 
    285  uint32_t num_groups = 0;
    286  if (!subtable.ReadU32(&num_groups)) {
    287    return Error("can't read number of format 12 subtable groups");
    288  }
    289  if (num_groups == 0 || subtable.remaining() / 12 < num_groups) {
    290    return Error("Bad format 12 subtable group count %d", num_groups);
    291  }
    292 
    293  std::vector<ots::OpenTypeCMAPSubtableRange> &groups
    294      = this->subtable_3_10_12;
    295  groups.resize(num_groups);
    296 
    297  for (unsigned i = 0; i < num_groups; ++i) {
    298    if (!subtable.ReadU32(&groups[i].start_range) ||
    299        !subtable.ReadU32(&groups[i].end_range) ||
    300        !subtable.ReadU32(&groups[i].start_glyph_id)) {
    301      return Error("can't read format 12 subtable group");
    302    }
    303 
    304    if (groups[i].start_range > kUnicodeUpperLimit ||
    305        groups[i].end_range > kUnicodeUpperLimit ||
    306        groups[i].start_glyph_id > 0xFFFF) {
    307      return Error("bad format 12 subtable group (startCharCode=0x%4X, endCharCode=0x%4X, startGlyphID=%d)",
    308                             groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id);
    309    }
    310 
    311    // We assert that the glyph value is within range. Because of the range
    312    // limits, above, we don't need to worry about overflow.
    313    if (groups[i].end_range < groups[i].start_range) {
    314      return Error("format 12 subtable group endCharCode before startCharCode (0x%4X < 0x%4X)",
    315                             groups[i].end_range, groups[i].start_range);
    316    }
    317    if ((groups[i].end_range - groups[i].start_range) +
    318        groups[i].start_glyph_id > num_glyphs) {
    319      return Error("bad format 12 subtable group startGlyphID (%d)", groups[i].start_glyph_id);
    320    }
    321  }
    322 
    323  // the groups must be sorted by start code and may not overlap
    324  for (unsigned i = 1; i < num_groups; ++i) {
    325    if (groups[i].start_range <= groups[i - 1].start_range) {
    326      return Error("out of order format 12 subtable group (startCharCode=0x%4X <= startCharCode=0x%4X of previous group)",
    327                             groups[i].start_range, groups[i-1].start_range);
    328    }
    329    if (groups[i].start_range <= groups[i - 1].end_range) {
    330      return Error("overlapping format 12 subtable groups (startCharCode=0x%4X <= endCharCode=0x%4X of previous group)",
    331                             groups[i].start_range, groups[i-1].end_range);
    332    }
    333  }
    334 
    335  return true;
    336 }
    337 
    338 bool OpenTypeCMAP::Parse31013(const uint8_t *data, size_t length,
    339                              uint16_t num_glyphs) {
    340  ots::Buffer subtable(data, length);
    341 
    342  // Format 13 tables are simple. We parse these and fully serialise them
    343  // later.
    344 
    345  if (!subtable.Skip(8)) {
    346    return Error("Bad cmap subtable length");
    347  }
    348  uint32_t language = 0;
    349  if (!subtable.ReadU32(&language)) {
    350    return Error("Can't read cmap subtable language");
    351  }
    352  if (language) {
    353    return Error("Cmap subtable language should be zero but is %d", language);
    354  }
    355 
    356  uint32_t num_groups = 0;
    357  if (!subtable.ReadU32(&num_groups)) {
    358    return Error("Can't read number of groups in a cmap subtable");
    359  }
    360 
    361  // We limit the number of groups in the same way as in 3.10.12 tables. See
    362  // the comment there in
    363  if (num_groups == 0 || subtable.remaining() / 12 < num_groups) {
    364    return Error("Bad format 13 subtable group count %d", num_groups);
    365  }
    366 
    367  std::vector<ots::OpenTypeCMAPSubtableRange> &groups = this->subtable_3_10_13;
    368  groups.resize(num_groups);
    369 
    370  for (unsigned i = 0; i < num_groups; ++i) {
    371    if (!subtable.ReadU32(&groups[i].start_range) ||
    372        !subtable.ReadU32(&groups[i].end_range) ||
    373        !subtable.ReadU32(&groups[i].start_glyph_id)) {
    374      return Error("Can't read subrange structure in a cmap subtable");
    375    }
    376 
    377    // We conservatively limit all of the values to protect some parsers from
    378    // overflows
    379    if (groups[i].start_range > kUnicodeUpperLimit ||
    380        groups[i].end_range > kUnicodeUpperLimit ||
    381        groups[i].start_glyph_id > 0xFFFF) {
    382      return Error("Bad subrange with start_range=%d, end_range=%d, start_glyph_id=%d", groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id);
    383    }
    384 
    385    if (groups[i].start_glyph_id >= num_glyphs) {
    386      return Error("Subrange starting glyph id too high (%d > %d)", groups[i].start_glyph_id, num_glyphs);
    387    }
    388  }
    389 
    390  // the groups must be sorted by start code and may not overlap
    391  for (unsigned i = 1; i < num_groups; ++i) {
    392    if (groups[i].start_range <= groups[i - 1].start_range) {
    393      return Error("Overlapping subrange starts (%d >= %d)", groups[i]. start_range, groups[i-1].start_range);
    394    }
    395    if (groups[i].start_range <= groups[i - 1].end_range) {
    396      return Error("Overlapping subranges (%d <= %d)", groups[i].start_range, groups[i-1].end_range);
    397    }
    398  }
    399 
    400  return true;
    401 }
    402 
    403 bool OpenTypeCMAP::Parse0514(const uint8_t *data, size_t length) {
    404  // Unicode Variation Selector table
    405  ots::Buffer subtable(data, length);
    406 
    407  // Format 14 tables are simple. We parse these and fully serialise them
    408  // later.
    409 
    410  // Skip format (USHORT) and length (ULONG)
    411  if (!subtable.Skip(6)) {
    412    return Error("Can't read start of cmap subtable");
    413  }
    414 
    415  uint32_t num_records = 0;
    416  if (!subtable.ReadU32(&num_records)) {
    417    return Error("Can't read number of records in cmap subtable");
    418  }
    419  if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
    420    return Error("Bad format 14 subtable records count %d", num_records);
    421  }
    422 
    423  std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
    424      = this->subtable_0_5_14;
    425  records.resize(num_records);
    426 
    427  for (unsigned i = 0; i < num_records; ++i) {
    428    if (!subtable.ReadU24(&records[i].var_selector) ||
    429        !subtable.ReadU32(&records[i].default_offset) ||
    430        !subtable.ReadU32(&records[i].non_default_offset)) {
    431      return Error("Can't read record structure of record %d in cmap subtale", i);
    432    }
    433    // Checks the value of variation selector
    434    if (!((records[i].var_selector >= kMongolianVSStart &&
    435           records[i].var_selector <= kMongolianVSEnd) ||
    436          (records[i].var_selector >= kVSStart &&
    437           records[i].var_selector <= kVSEnd) ||
    438          (records[i].var_selector >= kIVSStart &&
    439           records[i].var_selector <= kIVSEnd))) {
    440      return Error("Bad record variation selector (%04X) in record %i", records[i].var_selector, i);
    441    }
    442    if (i > 0 &&
    443        records[i-1].var_selector >= records[i].var_selector) {
    444      return Error("Out of order variation selector (%04X >= %04X) in record %d", records[i-1].var_selector, records[i].var_selector, i);
    445    }
    446 
    447    // Checks offsets
    448    if (!records[i].default_offset && !records[i].non_default_offset) {
    449      return Error("No default aoffset in variation selector record %d", i);
    450    }
    451    if (records[i].default_offset &&
    452        records[i].default_offset >= length) {
    453      return Error("Default offset too high (%d >= %ld) in record %d", records[i].default_offset, length, i);
    454    }
    455    if (records[i].non_default_offset &&
    456        records[i].non_default_offset >= length) {
    457      return Error("Non default offset too high (%d >= %ld) in record %d", records[i].non_default_offset, length, i);
    458    }
    459  }
    460 
    461  for (unsigned i = 0; i < num_records; ++i) {
    462    // Checks default UVS table
    463    if (records[i].default_offset) {
    464      subtable.set_offset(records[i].default_offset);
    465      uint32_t num_ranges = 0;
    466      if (!subtable.ReadU32(&num_ranges)) {
    467        return Error("Can't read number of ranges in record %d", i);
    468      }
    469      if (num_ranges == 0 || subtable.remaining() / 4 < num_ranges) {
    470        return Error("Bad number of ranges (%d) in record %d", num_ranges, i);
    471      }
    472 
    473      uint32_t last_unicode_value = 0;
    474      std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
    475          = records[i].ranges;
    476      ranges.resize(num_ranges);
    477 
    478      for (unsigned j = 0; j < num_ranges; ++j) {
    479        if (!subtable.ReadU24(&ranges[j].unicode_value) ||
    480            !subtable.ReadU8(&ranges[j].additional_count)) {
    481          return Error("Can't read range info in variation selector record %d", i);
    482        }
    483        const uint32_t check_value =
    484            ranges[j].unicode_value + ranges[j].additional_count;
    485        if (ranges[j].unicode_value == 0 ||
    486            ranges[j].unicode_value > kUnicodeUpperLimit ||
    487            check_value > kUVSUpperLimit ||
    488            (last_unicode_value &&
    489             ranges[j].unicode_value <= last_unicode_value)) {
    490          return Error("Bad Unicode value *%04X) in variation selector range %d record %d", ranges[j].unicode_value, j, i);
    491        }
    492        last_unicode_value = check_value;
    493      }
    494    }
    495 
    496    // Checks non default UVS table
    497    if (records[i].non_default_offset) {
    498      subtable.set_offset(records[i].non_default_offset);
    499      uint32_t num_mappings = 0;
    500      if (!subtable.ReadU32(&num_mappings)) {
    501        return Error("Can't read number of mappings in variation selector record %d", i);
    502      }
    503      if (num_mappings == 0 || subtable.remaining() / 5 < num_mappings) {
    504        return Error("Bad number of mappings (%d) in variation selector record %d", num_mappings, i);
    505      }
    506 
    507      uint32_t last_unicode_value = 0;
    508      std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
    509          = records[i].mappings;
    510      mappings.resize(num_mappings);
    511 
    512      for (unsigned j = 0; j < num_mappings; ++j) {
    513        if (!subtable.ReadU24(&mappings[j].unicode_value) ||
    514            !subtable.ReadU16(&mappings[j].glyph_id)) {
    515          return Error("Can't read mapping %d in variation selector record %d", j, i);
    516        }
    517        if (mappings[j].glyph_id == 0 || mappings[j].unicode_value == 0) {
    518          return Error("Bad mapping (%04X -> %d) in mapping %d of variation selector %d", mappings[j].unicode_value, mappings[j].glyph_id, j, i);
    519        }
    520        if (mappings[j].unicode_value > kUnicodeUpperLimit) {
    521          return Error("Invalid Unicode value (%04X > %04X) in mapping %d of variation selector %d", mappings[j].unicode_value, kUnicodeUpperLimit, j, i);
    522        }
    523        if (last_unicode_value &&
    524            mappings[j].unicode_value <= last_unicode_value) {
    525          return Error("Out of order Unicode value (%04X <= %04X) in mapping %d of variation selector %d", mappings[j].unicode_value, last_unicode_value, j, i);
    526        }
    527        last_unicode_value = mappings[j].unicode_value;
    528      }
    529    }
    530  }
    531 
    532  if (subtable.offset() != length) {
    533    return Error("Bad subtable offset (%ld != %ld)", subtable.offset(), length);
    534  }
    535  this->subtable_0_5_14_length = subtable.offset();
    536  return true;
    537 }
    538 
    539 bool OpenTypeCMAP::Parse100(const uint8_t *data, size_t length) {
    540  // Mac Roman table
    541  ots::Buffer subtable(data, length);
    542 
    543  if (!subtable.Skip(4)) {
    544    return Error("Bad cmap subtable");
    545  }
    546  uint16_t language = 0;
    547  if (!subtable.ReadU16(&language)) {
    548    return Error("Can't read language in cmap subtable");
    549  }
    550  if (language) {
    551    // simsun.ttf has non-zero language id.
    552    Warning("language id should be zero: %u", language);
    553  }
    554 
    555  this->subtable_1_0_0.reserve(kFormat0ArraySize);
    556  for (size_t i = 0; i < kFormat0ArraySize; ++i) {
    557    uint8_t glyph_id = 0;
    558    if (!subtable.ReadU8(&glyph_id)) {
    559      return Error("Can't read glyph id at array[%ld] in cmap subtable", i);
    560    }
    561    this->subtable_1_0_0.push_back(glyph_id);
    562  }
    563 
    564  return true;
    565 }
    566 
    567 bool OpenTypeCMAP::Parse(const uint8_t *data, size_t length) {
    568  Buffer table(data, length);
    569 
    570  uint16_t version = 0;
    571  uint16_t num_tables = 0;
    572  if (!table.ReadU16(&version) ||
    573      !table.ReadU16(&num_tables)) {
    574    return Error("Can't read structure of cmap");
    575  }
    576 
    577  if (version != 0) {
    578    return Error("Non zero cmap version (%d)", version);
    579  }
    580  if (!num_tables) {
    581    return Error("No subtables in cmap!");
    582  }
    583 
    584  std::vector<CMAPSubtableHeader> subtable_headers;
    585 
    586  // read the subtable headers
    587  subtable_headers.reserve(num_tables);
    588  for (unsigned i = 0; i < num_tables; ++i) {
    589    CMAPSubtableHeader subt;
    590 
    591    if (!table.ReadU16(&subt.platform) ||
    592        !table.ReadU16(&subt.encoding) ||
    593        !table.ReadU32(&subt.offset)) {
    594      return Error("Can't read subtable information cmap subtable %d", i);
    595    }
    596 
    597    subtable_headers.push_back(subt);
    598  }
    599 
    600  const size_t data_offset = table.offset();
    601 
    602  // make sure that all the offsets are valid.
    603  for (unsigned i = 0; i < num_tables; ++i) {
    604    if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
    605      return Error("Bad subtable offset in cmap subtable %d", i);
    606    }
    607    if (subtable_headers[i].offset < data_offset ||
    608        subtable_headers[i].offset >= length) {
    609      return Error("Bad subtable offset (%d) in cmap subtable %d", subtable_headers[i].offset, i);
    610    }
    611  }
    612 
    613  // the format of the table is the first couple of bytes in the table. The
    614  // length of the table is stored in a format-specific way.
    615  for (unsigned i = 0; i < num_tables; ++i) {
    616    table.set_offset(subtable_headers[i].offset);
    617    if (!table.ReadU16(&subtable_headers[i].format)) {
    618      return Error("Can't read cmap subtable header format %d", i);
    619    }
    620 
    621    uint16_t len = 0;
    622    uint16_t lang = 0;
    623    switch (subtable_headers[i].format) {
    624      case 0:
    625      case 4:
    626        if (!table.ReadU16(&len)) {
    627          return Error("Can't read cmap subtable %d length", i);
    628        }
    629        if (!table.ReadU16(&lang)) {
    630          return Error("Can't read cmap subtable %d language", i);
    631        }
    632        subtable_headers[i].length = len;
    633        subtable_headers[i].language = lang;
    634        break;
    635      case 12:
    636      case 13:
    637        if (!table.Skip(2)) {
    638          return Error("Bad cmap subtable %d structure", i);
    639        }
    640        if (!table.ReadU32(&subtable_headers[i].length)) {
    641          return Error("Can read cmap subtable %d length", i);
    642        }
    643        if (!table.ReadU32(&subtable_headers[i].language)) {
    644          return Error("Can't read cmap subtable %d language", i);
    645        }
    646        break;
    647      case 14:
    648        if (!table.ReadU32(&subtable_headers[i].length)) {
    649          return Error("Can't read cmap subtable %d length", i);
    650        }
    651        subtable_headers[i].language = 0;
    652        break;
    653      default:
    654        subtable_headers[i].length = 0;
    655        subtable_headers[i].language = 0;
    656        break;
    657    }
    658  }
    659 
    660  // check if the table is sorted first by platform ID, then by encoding ID.
    661  for (unsigned i = 1; i < num_tables; ++i) {
    662    if (subtable_headers[i - 1].platform > subtable_headers[i].platform ||
    663        (subtable_headers[i - 1].platform == subtable_headers[i].platform &&
    664         (subtable_headers[i - 1].encoding > subtable_headers[i].encoding ||
    665          (subtable_headers[i - 1].encoding == subtable_headers[i].encoding &&
    666           subtable_headers[i - 1].language > subtable_headers[i].language))))
    667      Warning("subtable %d with platform ID %d, encoding ID %d, language ID %d "
    668                  "following subtable with platform ID %d, encoding ID %d, language ID %d",
    669                  i,
    670                  subtable_headers[i].platform,
    671                  subtable_headers[i].encoding,
    672                  subtable_headers[i].language,
    673                  subtable_headers[i - 1].platform,
    674                  subtable_headers[i - 1].encoding,
    675                  subtable_headers[i - 1].language);
    676  }
    677 
    678  // Now, verify that all the lengths are sane
    679  for (unsigned i = 0; i < num_tables; ++i) {
    680    if (!subtable_headers[i].length) continue;
    681    if (subtable_headers[i].length > 1024 * 1024 * 1024) {
    682      return Error("Bad cmap subtable %d length", i);
    683    }
    684    // We know that both the offset and length are < 1GB, so the following
    685    // addition doesn't overflow
    686    const uint32_t end_byte
    687        = subtable_headers[i].offset + subtable_headers[i].length;
    688    if (end_byte > length) {
    689      return Error("Over long cmap subtable %d @ %d for %d", i, subtable_headers[i].offset, subtable_headers[i].length);
    690    }
    691  }
    692 
    693  // check that the cmap subtables are not overlapping.
    694  std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
    695  std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
    696  for (unsigned i = 0; i < num_tables; ++i) {
    697    const uint32_t end_byte
    698        = subtable_headers[i].offset + subtable_headers[i].length;
    699 
    700    if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
    701                                            end_byte)).second) {
    702      // Sometimes Unicode table and MS table share exactly the same data.
    703      // We'll allow this.
    704      continue;
    705    }
    706    overlap_checker.push_back(
    707        std::make_pair(subtable_headers[i].offset,
    708                       static_cast<uint8_t>(1) /* start */));
    709    overlap_checker.push_back(
    710        std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
    711  }
    712  std::sort(overlap_checker.begin(), overlap_checker.end());
    713  int overlap_count = 0;
    714  for (unsigned i = 0; i < overlap_checker.size(); ++i) {
    715    overlap_count += (overlap_checker[i].second ? 1 : -1);
    716    if (overlap_count > 1) {
    717      return Error("Excessive overlap count %d", overlap_count);
    718    }
    719  }
    720 
    721  // we grab the number of glyphs in the file from the maxp table to make sure
    722  // that the character map isn't referencing anything beyound this range.
    723  OpenTypeMAXP *maxp = static_cast<OpenTypeMAXP*>(
    724      GetFont()->GetTypedTable(OTS_TAG_MAXP));
    725  if (!maxp) {
    726    return Error("No maxp table in font! Needed by cmap.");
    727  }
    728  const uint16_t num_glyphs = maxp->num_glyphs;
    729 
    730  // We only support a subset of the possible character map tables. Microsoft
    731  // 'strongly recommends' that everyone supports the Unicode BMP table with
    732  // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
    733  //   Platform ID   Encoding ID  Format
    734  //   0             0            4       (Unicode Default)
    735  //   0             1            4       (Unicode 1.1)
    736  //   0             3            4       (Unicode BMP)
    737  //   0             3            12      (Unicode UCS-4)
    738  //   0             5            14      (Unicode Variation Sequences)
    739  //   1             0            0       (Mac Roman)
    740  //   3             0            4       (MS Symbol)
    741  //   3             1            4       (MS Unicode BMP)
    742  //   3             10           12      (MS Unicode UCS-4)
    743  //   3             10           13      (MS UCS-4 Fallback mapping)
    744  //
    745  // Note:
    746  //  * 0-0-4 and 0-1-4 tables are (usually) written as a 3-1-4 table. If 3-1-4 table
    747  //    also exists, the 0-0-4 or 0-1-4 tables are ignored.
    748  //  * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
    749  //    Some fonts which include 0-5-14 table seems to be required 0-3-4
    750  //    table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
    751  //  * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
    752  //    exists, the 0-3-12 table is ignored.
    753  //
    754 
    755  for (unsigned i = 0; i < num_tables; ++i) {
    756    if (subtable_headers[i].platform == 0) {
    757      // Unicode platform
    758 
    759      if ((subtable_headers[i].encoding == 0 || subtable_headers[i].encoding == 1) &&
    760          (subtable_headers[i].format == 4)) {
    761        // parse and output the 0-0-4 and 0-1-4 tables as 3-1-4 table. Sometimes the 0-0-4
    762        // table actually points to MS symbol data and thus should be parsed as
    763        // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
    764        // recovered in ots_cmap_serialise().
    765        if (!ParseFormat4(3, 1, data + subtable_headers[i].offset,
    766                      subtable_headers[i].length, num_glyphs)) {
    767          return Error("Failed to parse format 4 cmap subtable %d", i);
    768        }
    769      } else if ((subtable_headers[i].encoding == 3) &&
    770                 (subtable_headers[i].format == 4)) {
    771        // parse and output the 0-3-4 table as 0-3-4 table.
    772        if (!ParseFormat4(0, 3, data + subtable_headers[i].offset,
    773                      subtable_headers[i].length, num_glyphs)) {
    774          return Error("Failed to parse format 4 cmap subtable %d", i);
    775        }
    776      } else if ((subtable_headers[i].encoding == 3 ||
    777                  subtable_headers[i].encoding == 4) &&
    778                 (subtable_headers[i].format == 12)) {
    779        // parse and output the 0-3-12 or 0-4-12 tables as 3-10-12 table.
    780        if (!Parse31012(data + subtable_headers[i].offset,
    781                        subtable_headers[i].length, num_glyphs)) {
    782          return Error("Failed to parse format 12 cmap subtable %d", i);
    783        }
    784      } else if ((subtable_headers[i].encoding == 5) &&
    785                 (subtable_headers[i].format == 14)) {
    786        if (!Parse0514(data + subtable_headers[i].offset,
    787                       subtable_headers[i].length)) {
    788          return Error("Failed to parse format 14 cmap subtable %d", i);
    789        }
    790      }
    791    } else if (subtable_headers[i].platform == 1) {
    792      // Mac platform
    793 
    794      if ((subtable_headers[i].encoding == 0) &&
    795          (subtable_headers[i].format == 0)) {
    796        // parse and output the 1-0-0 table.
    797        if (!Parse100(data + subtable_headers[i].offset,
    798                      subtable_headers[i].length)) {
    799          return OTS_FAILURE();
    800        }
    801      }
    802    } else if (subtable_headers[i].platform == 3) {
    803      // MS platform
    804 
    805      switch (subtable_headers[i].encoding) {
    806        case 0:
    807        case 1:
    808          if (subtable_headers[i].format == 4) {
    809            // parse 3-0-4 or 3-1-4 table.
    810            if (!ParseFormat4(subtable_headers[i].platform,
    811                          subtable_headers[i].encoding,
    812                          data + subtable_headers[i].offset,
    813                          subtable_headers[i].length, num_glyphs)) {
    814              return OTS_FAILURE();
    815            }
    816          }
    817          break;
    818        case 10:
    819          if (subtable_headers[i].format == 12) {
    820            this->subtable_3_10_12.clear();
    821            if (!Parse31012(data + subtable_headers[i].offset,
    822                            subtable_headers[i].length, num_glyphs)) {
    823              return OTS_FAILURE();
    824            }
    825          } else if (subtable_headers[i].format == 13) {
    826            this->subtable_3_10_13.clear();
    827            if (!Parse31013(data + subtable_headers[i].offset,
    828                            subtable_headers[i].length, num_glyphs)) {
    829              return OTS_FAILURE();
    830            }
    831          }
    832          break;
    833      }
    834    }
    835  }
    836 
    837  return true;
    838 }
    839 
    840 bool OpenTypeCMAP::Serialize(OTSStream *out) {
    841  const bool have_034 = this->subtable_0_3_4_data != NULL;
    842  const bool have_0514 = this->subtable_0_5_14.size() != 0;
    843  const bool have_100 = this->subtable_1_0_0.size() != 0;
    844  const bool have_304 = this->subtable_3_0_4_data != NULL;
    845  // MS Symbol and MS Unicode tables should not co-exist.
    846  // See the comment above in 0-0-4 parser.
    847  const bool have_314 = (!have_304) && this->subtable_3_1_4_data;
    848  const bool have_31012 = this->subtable_3_10_12.size() != 0;
    849  const bool have_31013 = this->subtable_3_10_13.size() != 0;
    850  const uint16_t num_subtables = static_cast<uint16_t>(have_034) +
    851                                 static_cast<uint16_t>(have_0514) +
    852                                 static_cast<uint16_t>(have_100) +
    853                                 static_cast<uint16_t>(have_304) +
    854                                 static_cast<uint16_t>(have_314) +
    855                                 static_cast<uint16_t>(have_31012) +
    856                                 static_cast<uint16_t>(have_31013);
    857  const off_t table_start = out->Tell();
    858 
    859  // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
    860  // (e.g., old fonts for Mac). We don't support them.
    861  if (!have_304 && !have_314 && !have_034 && !have_31012 && !have_31013) {
    862    return Error("no supported subtables were found");
    863  }
    864 
    865  if (!out->WriteU16(0) ||
    866      !out->WriteU16(num_subtables)) {
    867    return OTS_FAILURE();
    868  }
    869 
    870  const off_t record_offset = out->Tell();
    871  if (!out->Pad(num_subtables * 8)) {
    872    return OTS_FAILURE();
    873  }
    874 
    875  const off_t offset_034 = out->Tell();
    876  if (have_034) {
    877    if (!out->Write(this->subtable_0_3_4_data,
    878                    this->subtable_0_3_4_length)) {
    879      return OTS_FAILURE();
    880    }
    881  }
    882 
    883  const off_t offset_0514 = out->Tell();
    884  if (have_0514) {
    885    const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
    886        = this->subtable_0_5_14;
    887    const unsigned num_records = records.size();
    888    if (!out->WriteU16(14) ||
    889        !out->WriteU32(this->subtable_0_5_14_length) ||
    890        !out->WriteU32(num_records)) {
    891      return OTS_FAILURE();
    892    }
    893    for (unsigned i = 0; i < num_records; ++i) {
    894      if (!out->WriteU24(records[i].var_selector) ||
    895          !out->WriteU32(records[i].default_offset) ||
    896          !out->WriteU32(records[i].non_default_offset)) {
    897        return OTS_FAILURE();
    898      }
    899    }
    900    for (unsigned i = 0; i < num_records; ++i) {
    901      if (records[i].default_offset) {
    902        const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
    903            = records[i].ranges;
    904        const unsigned num_ranges = ranges.size();
    905        if (!out->Seek(records[i].default_offset + offset_0514) ||
    906            !out->WriteU32(num_ranges)) {
    907          return OTS_FAILURE();
    908        }
    909        for (unsigned j = 0; j < num_ranges; ++j) {
    910          if (!out->WriteU24(ranges[j].unicode_value) ||
    911              !out->WriteU8(ranges[j].additional_count)) {
    912            return OTS_FAILURE();
    913          }
    914        }
    915      }
    916      if (records[i].non_default_offset) {
    917        const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
    918            = records[i].mappings;
    919        const unsigned num_mappings = mappings.size();
    920        if (!out->Seek(records[i].non_default_offset + offset_0514) ||
    921            !out->WriteU32(num_mappings)) {
    922          return OTS_FAILURE();
    923        }
    924        for (unsigned j = 0; j < num_mappings; ++j) {
    925          if (!out->WriteU24(mappings[j].unicode_value) ||
    926              !out->WriteU16(mappings[j].glyph_id)) {
    927            return OTS_FAILURE();
    928          }
    929        }
    930      }
    931    }
    932  }
    933 
    934  const off_t offset_100 = out->Tell();
    935  if (have_100) {
    936    if (!out->WriteU16(0) ||  // format
    937        !out->WriteU16(6 + kFormat0ArraySize) ||  // length
    938        !out->WriteU16(0)) {  // language
    939      return OTS_FAILURE();
    940    }
    941    if (!out->Write(&(this->subtable_1_0_0[0]), kFormat0ArraySize)) {
    942      return OTS_FAILURE();
    943    }
    944  }
    945 
    946  const off_t offset_304 = out->Tell();
    947  if (have_304) {
    948    if (!out->Write(this->subtable_3_0_4_data,
    949                    this->subtable_3_0_4_length)) {
    950      return OTS_FAILURE();
    951    }
    952  }
    953 
    954  const off_t offset_314 = out->Tell();
    955  if (have_314) {
    956    if (!out->Write(this->subtable_3_1_4_data,
    957                    this->subtable_3_1_4_length)) {
    958      return OTS_FAILURE();
    959    }
    960  }
    961 
    962  const off_t offset_31012 = out->Tell();
    963  if (have_31012) {
    964    std::vector<OpenTypeCMAPSubtableRange> &groups
    965        = this->subtable_3_10_12;
    966    const unsigned num_groups = groups.size();
    967    if (!out->WriteU16(12) ||
    968        !out->WriteU16(0) ||
    969        !out->WriteU32(num_groups * 12 + 16) ||
    970        !out->WriteU32(0) ||
    971        !out->WriteU32(num_groups)) {
    972      return OTS_FAILURE();
    973    }
    974 
    975    for (unsigned i = 0; i < num_groups; ++i) {
    976      if (!out->WriteU32(groups[i].start_range) ||
    977          !out->WriteU32(groups[i].end_range) ||
    978          !out->WriteU32(groups[i].start_glyph_id)) {
    979        return OTS_FAILURE();
    980      }
    981    }
    982  }
    983 
    984  const off_t offset_31013 = out->Tell();
    985  if (have_31013) {
    986    std::vector<OpenTypeCMAPSubtableRange> &groups
    987        = this->subtable_3_10_13;
    988    const unsigned num_groups = groups.size();
    989    if (!out->WriteU16(13) ||
    990        !out->WriteU16(0) ||
    991        !out->WriteU32(num_groups * 12 + 16) ||
    992        !out->WriteU32(0) ||
    993        !out->WriteU32(num_groups)) {
    994      return OTS_FAILURE();
    995    }
    996 
    997    for (unsigned i = 0; i < num_groups; ++i) {
    998      if (!out->WriteU32(groups[i].start_range) ||
    999          !out->WriteU32(groups[i].end_range) ||
   1000          !out->WriteU32(groups[i].start_glyph_id)) {
   1001        return OTS_FAILURE();
   1002      }
   1003    }
   1004  }
   1005 
   1006  const off_t table_end = out->Tell();
   1007 
   1008  // Now seek back and write the table of offsets
   1009  if (!out->Seek(record_offset)) {
   1010    return OTS_FAILURE();
   1011  }
   1012 
   1013  if (have_034) {
   1014    if (!out->WriteU16(0) ||
   1015        !out->WriteU16(3) ||
   1016        !out->WriteU32(offset_034 - table_start)) {
   1017      return OTS_FAILURE();
   1018    }
   1019  }
   1020 
   1021  if (have_0514) {
   1022    if (!out->WriteU16(0) ||
   1023        !out->WriteU16(5) ||
   1024        !out->WriteU32(offset_0514 - table_start)) {
   1025      return OTS_FAILURE();
   1026    }
   1027  }
   1028 
   1029  if (have_100) {
   1030    if (!out->WriteU16(1) ||
   1031        !out->WriteU16(0) ||
   1032        !out->WriteU32(offset_100 - table_start)) {
   1033      return OTS_FAILURE();
   1034    }
   1035  }
   1036 
   1037  if (have_304) {
   1038    if (!out->WriteU16(3) ||
   1039        !out->WriteU16(0) ||
   1040        !out->WriteU32(offset_304 - table_start)) {
   1041      return OTS_FAILURE();
   1042    }
   1043  }
   1044 
   1045  if (have_314) {
   1046    if (!out->WriteU16(3) ||
   1047        !out->WriteU16(1) ||
   1048        !out->WriteU32(offset_314 - table_start)) {
   1049      return OTS_FAILURE();
   1050    }
   1051  }
   1052 
   1053  if (have_31012) {
   1054    if (!out->WriteU16(3) ||
   1055        !out->WriteU16(10) ||
   1056        !out->WriteU32(offset_31012 - table_start)) {
   1057      return OTS_FAILURE();
   1058    }
   1059  }
   1060 
   1061  if (have_31013) {
   1062    if (!out->WriteU16(3) ||
   1063        !out->WriteU16(10) ||
   1064        !out->WriteU32(offset_31013 - table_start)) {
   1065      return OTS_FAILURE();
   1066    }
   1067  }
   1068 
   1069  if (!out->Seek(table_end)) {
   1070    return OTS_FAILURE();
   1071  }
   1072 
   1073  return true;
   1074 }
   1075 
   1076 }  // namespace ots