tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

name.cc (12318B)


      1 // Copyright (c) 2011-2017 The OTS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "name.h"
      6 
      7 #include <algorithm>
      8 #include <cstring>
      9 #include <cctype>
     10 
     11 // name - Naming Table
     12 // http://www.microsoft.com/typography/otspec/name.htm
     13 
     14 namespace {
     15 
     16 // We disallow characters outside the URI spec "unreserved characters"
     17 // set; any chars outside this set will be replaced by underscore.
     18 bool AllowedInPsName(char c) {
     19  return isalnum(c) || std::strchr("-._~", c);
     20 }
     21 
     22 bool SanitizePsNameAscii(std::string& name) {
     23  if (name.size() > 63)
     24    return false;
     25 
     26  for (unsigned i = 0; i < name.size(); ++i) {
     27    if (!AllowedInPsName(name[i])) {
     28      name[i] = '_';
     29    }
     30  }
     31  return true;
     32 }
     33 
     34 bool SanitizePsNameUtf16Be(std::string& name) {
     35  if ((name.size() & 1) != 0)
     36    return false;
     37  if (name.size() > 2 * 63)
     38    return false;
     39 
     40  for (unsigned i = 0; i < name.size(); i += 2) {
     41    if (name[i] != 0) {
     42      // non-Latin1 char in psname? reject it altogether
     43      return false;
     44    }
     45    if (!AllowedInPsName(name[i+1])) {
     46      name[i] = '_';
     47    }
     48  }
     49  return true;
     50 }
     51 
     52 void AssignToUtf16BeFromAscii(std::string* target,
     53                              const std::string& source) {
     54  target->resize(source.size() * 2);
     55  for (unsigned i = 0, j = 0; i < source.size(); i++) {
     56    (*target)[j++] = '\0';
     57    (*target)[j++] = source[i];
     58  }
     59 }
     60 
     61 }  // namespace
     62 
     63 
     64 namespace ots {
     65 
     66 bool OpenTypeNAME::Parse(const uint8_t* data, size_t length) {
     67  Buffer table(data, length);
     68 
     69  uint16_t format = 0;
     70  if (!table.ReadU16(&format) || format > 1) {
     71    return Error("Failed to read table format or bad format %d", format);
     72  }
     73 
     74  uint16_t count = 0;
     75  if (!table.ReadU16(&count)) {
     76    return Error("Failed to read name count");
     77  }
     78 
     79  uint16_t string_offset = 0;
     80  if (!table.ReadU16(&string_offset) || string_offset > length) {
     81    return Error("Failed to read or bad stringOffset");
     82  }
     83  const char* string_base = reinterpret_cast<const char*>(data) +
     84      string_offset;
     85 
     86  bool sort_required = false;
     87 
     88  // Read all the names, discarding any with invalid IDs,
     89  // and any where the offset/length would be outside the table.
     90  // A stricter alternative would be to reject the font if there
     91  // are invalid name records, but it's not clear that is necessary.
     92  for (unsigned i = 0; i < count; ++i) {
     93    NameRecord rec;
     94    uint16_t name_length, name_offset = 0;
     95    if (!table.ReadU16(&rec.platform_id) ||
     96        !table.ReadU16(&rec.encoding_id) ||
     97        !table.ReadU16(&rec.language_id) ||
     98        !table.ReadU16(&rec.name_id) ||
     99        !table.ReadU16(&name_length) ||
    100        !table.ReadU16(&name_offset)) {
    101      return Error("Failed to read name entry %d", i);
    102    }
    103    // check platform & encoding, discard names with unknown values
    104    switch (rec.platform_id) {
    105      case 0:  // Unicode
    106        if (rec.encoding_id > 6) {
    107          continue;
    108        }
    109        break;
    110      case 1:  // Macintosh
    111        if (rec.encoding_id > 32) {
    112          continue;
    113        }
    114        break;
    115      case 2:  // ISO
    116        if (rec.encoding_id > 2) {
    117          continue;
    118        }
    119        break;
    120      case 3:  // Windows: IDs 7 to 9 are "reserved"
    121        if (rec.encoding_id > 6 && rec.encoding_id != 10) {
    122          continue;
    123        }
    124        break;
    125      case 4:  // Custom (OTF Windows NT compatibility)
    126        if (rec.encoding_id > 255) {
    127          continue;
    128        }
    129        break;
    130      default:  // unknown platform
    131        continue;
    132    }
    133 
    134    const unsigned name_end = static_cast<unsigned>(string_offset) +
    135        name_offset + name_length;
    136    if (name_end > length) {
    137      continue;
    138    }
    139    rec.text.resize(name_length);
    140    rec.text.assign(string_base + name_offset, name_length);
    141 
    142    if (rec.name_id == 6) {
    143      // PostScript name: "sanitize" it by replacing any chars outside the
    144      // URI spec "unreserved" set by underscore, or reject the name entirely
    145      // (and use a fallback) if it looks really broken.
    146      if (rec.platform_id == 1) {
    147        if (!SanitizePsNameAscii(rec.text)) {
    148          continue;
    149        }
    150      } else if (rec.platform_id == 0 || rec.platform_id == 3) {
    151        if (!SanitizePsNameUtf16Be(rec.text)) {
    152          continue;
    153        }
    154      }
    155    }
    156 
    157    if (!this->names.empty() && !(this->names.back() < rec)) {
    158      Warning("name records are not sorted.");
    159      sort_required = true;
    160    }
    161 
    162    this->names.push_back(rec);
    163    this->name_ids.insert(rec.name_id);
    164  }
    165 
    166  if (format == 1) {
    167    // extended name table format with language tags
    168    uint16_t lang_tag_count;
    169    if (!table.ReadU16(&lang_tag_count)) {
    170      return Error("Failed to read langTagCount");
    171    }
    172    for (unsigned i = 0; i < lang_tag_count; ++i) {
    173      uint16_t tag_length = 0;
    174      uint16_t tag_offset = 0;
    175      if (!table.ReadU16(&tag_length) || !table.ReadU16(&tag_offset)) {
    176        return Error("Failed to read length or offset for langTagRecord %d", i);
    177      }
    178      const unsigned tag_end = static_cast<unsigned>(string_offset) +
    179          tag_offset + tag_length;
    180      if (tag_end > length) {
    181        return Error("bad end of tag %d > %ld for langTagRecord %d", tag_end, length, i);
    182      }
    183      // Lang tag is BCP 47 tag per the spec, the recommonded BCP 47 max tag
    184      // length is 35:
    185      // https://tools.ietf.org/html/bcp47#section-4.4.1
    186      // We are being too generous and allowing for 100 (multiplied by 2 since
    187      // this is UTF-16 string).
    188      if (tag_length > 100 * 2) {
    189        return Error("Too long language tag for LangTagRecord %d: %d", i, tag_length);
    190      }
    191      std::string tag(string_base + tag_offset, tag_length);
    192      this->lang_tags.push_back(tag);
    193    }
    194  }
    195 
    196  if (table.offset() > string_offset) {
    197    // the string storage apparently overlapped the name/tag records;
    198    // consider this font to be badly broken
    199    return Error("Bad table offset %ld > %d", table.offset(), string_offset);
    200  }
    201 
    202  // check existence of required name strings (synthesize if necessary)
    203  //  [0 - copyright - skip]
    204  //   1 - family
    205  //   2 - subfamily
    206  //  [3 - unique ID - skip]
    207  //   4 - full name
    208  //   5 - version
    209  //   6 - postscript name
    210  static const uint16_t kStdNameCount = 7;
    211  static const char* kStdNames[kStdNameCount] = {
    212    NULL,
    213    "OTS derived font",
    214    "Unspecified",
    215    NULL,
    216    "OTS derived font",
    217    "1.000",
    218    "OTS-derived-font"
    219  };
    220 
    221  // scan the names to check whether the required "standard" ones are present;
    222  // if not, we'll add our fixed versions here
    223  bool mac_name[kStdNameCount] = { 0 };
    224  bool win_name[kStdNameCount] = { 0 };
    225  for (const auto& name : this->names) {
    226    const uint16_t id = name.name_id;
    227    if (id >= kStdNameCount || kStdNames[id] == NULL) {
    228      continue;
    229    }
    230    if (name.platform_id == 1) {
    231      mac_name[id] = true;
    232      continue;
    233    }
    234    if (name.platform_id == 3) {
    235      win_name[id] = true;
    236      continue;
    237    }
    238  }
    239 
    240  for (uint16_t i = 0; i < kStdNameCount; ++i) {
    241    if (kStdNames[i] == NULL) {
    242      continue;
    243    }
    244    if (!mac_name[i] && !win_name[i]) {
    245      NameRecord mac_rec(1 /* platform_id */, 0 /* encoding_id */,
    246                         0 /* language_id */ , i /* name_id */);
    247      mac_rec.text.assign(kStdNames[i]);
    248 
    249      NameRecord win_rec(3 /* platform_id */, 1 /* encoding_id */,
    250                         1033 /* language_id */ , i /* name_id */);
    251      AssignToUtf16BeFromAscii(&win_rec.text, std::string(kStdNames[i]));
    252 
    253      this->names.push_back(mac_rec);
    254      this->names.push_back(win_rec);
    255      sort_required = true;
    256    }
    257  }
    258 
    259  if (sort_required) {
    260    std::sort(this->names.begin(), this->names.end());
    261  }
    262 
    263  return true;
    264 }
    265 
    266 bool OpenTypeNAME::Serialize(OTSStream* out) {
    267  uint16_t name_count = static_cast<uint16_t>(this->names.size());
    268  uint16_t lang_tag_count = static_cast<uint16_t>(this->lang_tags.size());
    269  uint16_t format = 0;
    270  size_t string_offset = 6 + name_count * 12;
    271 
    272  if (this->lang_tags.size() > 0) {
    273    // lang tags require a format-1 name table
    274    format = 1;
    275    string_offset += 2 + lang_tag_count * 4;
    276  }
    277  if (string_offset > 0xffff) {
    278    return Error("Bad stringOffset: %ld", string_offset);
    279  }
    280  if (!out->WriteU16(format) ||
    281      !out->WriteU16(name_count) ||
    282      !out->WriteU16(static_cast<uint16_t>(string_offset))) {
    283    return Error("Failed to write name header");
    284  }
    285 
    286  std::string string_data;
    287  for (const auto& rec : this->names) {
    288    if (string_data.size() + rec.text.size() >
    289            std::numeric_limits<uint16_t>::max() ||
    290        !out->WriteU16(rec.platform_id) ||
    291        !out->WriteU16(rec.encoding_id) ||
    292        !out->WriteU16(rec.language_id) ||
    293        !out->WriteU16(rec.name_id) ||
    294        !out->WriteU16(static_cast<uint16_t>(rec.text.size())) ||
    295        !out->WriteU16(static_cast<uint16_t>(string_data.size())) ) {
    296      return Error("Failed to write nameRecord");
    297    }
    298    string_data.append(rec.text);
    299  }
    300 
    301  if (format == 1) {
    302    if (!out->WriteU16(lang_tag_count)) {
    303      return Error("Failed to write langTagCount");
    304    }
    305    for (const auto& tag : this->lang_tags) {
    306      if (string_data.size() + tag.size() >
    307              std::numeric_limits<uint16_t>::max() ||
    308          !out->WriteU16(static_cast<uint16_t>(tag.size())) ||
    309          !out->WriteU16(static_cast<uint16_t>(string_data.size()))) {
    310        return Error("Failed to write langTagRecord");
    311      }
    312      string_data.append(tag);
    313    }
    314  }
    315 
    316  if (!out->Write(string_data.data(), string_data.size())) {
    317    return Error("Failed to write string data");
    318  }
    319 
    320  return true;
    321 }
    322 
    323 bool OpenTypeNAME::IsValidNameId(uint16_t nameID, bool addIfMissing) {
    324  if (addIfMissing && !this->name_ids.count(nameID)) {
    325    bool added_unicode = false;
    326    bool added_macintosh = false;
    327    bool added_windows = false;
    328    const size_t names_size = this->names.size();  // original size
    329    for (size_t i = 0; i < names_size; ++i) switch (names[i].platform_id) {
    330     case 0:
    331      if (!added_unicode) {
    332        // If there is an existing NameRecord with platform_id == 0 (Unicode),
    333        // then add a NameRecord for the the specified nameID with arguments
    334        // 0 (Unicode), 0 (v1.0), 0 (unspecified language).
    335        this->names.emplace_back(0, 0, 0, nameID);
    336        this->names.back().text = "NoName";
    337        added_unicode = true;
    338      }
    339      break;
    340     case 1:
    341      if (!added_macintosh) {
    342        // If there is an existing NameRecord with platform_id == 1 (Macintosh),
    343        // then add a NameRecord for the specified nameID with arguments
    344        // 1 (Macintosh), 0 (Roman), 0 (English).
    345        this->names.emplace_back(1, 0, 0, nameID);
    346        this->names.back().text = "NoName";
    347        added_macintosh = true;
    348      }
    349      break;
    350     case 3:
    351      if (!added_windows) {
    352        // If there is an existing NameRecord with platform_id == 3 (Windows),
    353        // then add a NameRecord for the specified nameID with arguments
    354        // 3 (Windows), 1 (UCS), 1033 (US English).
    355        this->names.emplace_back(3, 1, 1033, nameID);
    356        this->names.back().text = "NoName";
    357        added_windows = true;
    358      }
    359      break;
    360    }
    361    if (added_unicode || added_macintosh || added_windows) {
    362      std::sort(this->names.begin(), this->names.end());
    363      this->name_ids.insert(nameID);
    364    }
    365  }
    366  return this->name_ids.count(nameID);
    367 }
    368 
    369 // List of font names considered "tricky" (dependent on applying original TrueType instructions) by FreeType, see
    370 // https://gitlab.freedesktop.org/freetype/freetype/-/blob/2d9fce53d4ce89f36075168282fcdd7289e082f9/src/truetype/ttobjs.c#L170-241
    371 static const char* tricky_font_names[] = {
    372  "cpop",
    373  "DFGirl-W6-WIN-BF",
    374  "DFGothic-EB",
    375  "DFGyoSho-Lt",
    376  "DFHei",
    377  "DFHSGothic-W5",
    378  "DFHSMincho-W3",
    379  "DFHSMincho-W7",
    380  "DFKaiSho-SB",
    381  "DFKaiShu",
    382  "DFKai-SB",
    383  "DFMing",
    384  "DLC",
    385  "HuaTianKaiTi?",
    386  "HuaTianSongTi?",
    387  "Ming(for ISO10646)",
    388  "MingLiU",
    389  "MingMedium",
    390  "PMingLiU",
    391  "MingLi43"
    392 };
    393 
    394 bool OpenTypeNAME::IsTrickyFont() const {
    395  for (const auto& name : this->names) {
    396    const uint16_t id = name.name_id;
    397    if (id != 1) {
    398      continue;
    399    }
    400    for (const auto* p : tricky_font_names) {
    401      if (name.text.find(p) != std::string::npos) {
    402        return true;
    403      }
    404    }
    405  }
    406  return false;
    407 }
    408 
    409 }  // namespace