tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ligature-graph.hh (19177B)


      1 /*
      2 * Copyright © 2025  Google, Inc.
      3 *
      4 *  This is part of HarfBuzz, a text shaping library.
      5 *
      6 * Permission is hereby granted, without written agreement and without
      7 * license or royalty fees, to use, copy, modify, and distribute this
      8 * software and its documentation for any purpose, provided that the
      9 * above copyright notice and the following two paragraphs appear in
     10 * all copies of this software.
     11 *
     12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16 * DAMAGE.
     17 *
     18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23 *
     24 * Google Author(s): Garret Rieger
     25 */
     26 
     27 #ifndef GRAPH_LIGATURE_GRAPH_HH
     28 #define GRAPH_LIGATURE_GRAPH_HH
     29 
     30 #include "graph.hh"
     31 #include "../OT/Layout/GSUB/LigatureSubst.hh"
     32 #include "../OT/Layout/GSUB/LigatureSubstFormat1.hh"
     33 #include "../OT/Layout/GSUB/LigatureSet.hh"
     34 #include "../OT/Layout/types.hh"
     35 #include <algorithm>
     36 #include <utility>
     37 
     38 namespace graph {
     39 
     40 struct LigatureSet : public OT::Layout::GSUB_impl::LigatureSet<SmallTypes>
     41 {
     42  bool sanitize (graph_t::vertex_t& vertex) const
     43  {
     44    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
     45    if (vertex_len < OT::Layout::GSUB_impl::LigatureSet<SmallTypes>::min_size) return false;
     46    hb_barrier ();
     47 
     48    int64_t total_len = ligature.get_size() + OT::Layout::GSUB_impl::LigatureSet<SmallTypes>::min_size - ligature.len.get_size();
     49    if (vertex_len < total_len) {
     50      return false;
     51    }
     52    return true;
     53  }
     54 };
     55 
     56 struct LigatureSubstFormat1 : public OT::Layout::GSUB_impl::LigatureSubstFormat1_2<SmallTypes>
     57 {
     58  bool sanitize (graph_t::vertex_t& vertex) const
     59  {
     60    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
     61    unsigned min_size = OT::Layout::GSUB_impl::LigatureSubstFormat1_2<SmallTypes>::min_size;
     62    if (vertex_len < min_size) return false;
     63    hb_barrier ();
     64 
     65    return vertex_len >=
     66        min_size + ligatureSet.get_size() - ligatureSet.len.get_size();
     67  }
     68 
     69  hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c,
     70                                         unsigned this_index)
     71  {
     72    auto split_points = compute_split_points(c, this_index);
     73    split_context_t split_context {
     74      c,
     75      this,
     76      this_index,
     77      total_number_ligas(c, this_index),
     78      liga_counts(c, this_index),
     79    };
     80    return actuate_subtable_split<split_context_t> (split_context, split_points);
     81  }
     82 
     83 private:
     84  unsigned total_number_ligas(gsubgpos_graph_context_t& c, unsigned this_index) const {
     85    unsigned total = 0;
     86    for (unsigned i = 0; i < ligatureSet.len; i++)
     87    {
     88      auto liga_set = c.graph.as_table<LigatureSet>(this_index, &ligatureSet[i]);
     89      if (!liga_set.table) {
     90        return 0;
     91      }
     92      total += liga_set.table->ligature.len;
     93    }
     94    return total;
     95  }
     96 
     97  hb_vector_t<unsigned> liga_counts(gsubgpos_graph_context_t& c, unsigned this_index) const {
     98    hb_vector_t<unsigned> result;
     99    for (unsigned i = 0; i < ligatureSet.len; i++)
    100    {
    101      auto liga_set = c.graph.as_table<LigatureSet>(this_index, &ligatureSet[i]);
    102      result.push(!liga_set.table ? 0 : liga_set.table->ligature.len);
    103    }
    104    return result;
    105  }
    106 
    107  hb_vector_t<unsigned> ligature_index_to_object_id(const graph_t::vertex_and_table_t<LigatureSet>& liga_set) const {
    108    hb_vector_t<unsigned> map;
    109    map.resize_exact(liga_set.table->ligature.len);
    110    if (map.in_error()) return map;
    111 
    112    for (unsigned i = 0; i < map.length; i++) {
    113      map[i] = (unsigned) -1;
    114    }
    115 
    116    for (const auto& l : liga_set.vertex->obj.real_links) {
    117      if (l.position < 2) continue;
    118      unsigned array_index = (l.position - 2) / 2;
    119      map[array_index] = l.objidx;
    120    }
    121    return map;
    122  }
    123 
    124  hb_vector_t<unsigned> compute_split_points(gsubgpos_graph_context_t& c,
    125                                             unsigned this_index) const
    126  {
    127    // For ligature subst coverage is always packed last, and as a result is where an overflow
    128    // will happen if there is one, so we can check the estimate length of the
    129    // LigatureSubstFormat1 -> Coverage offset length which is the sum of all data in the
    130    // retained sub graph except for the coverage table itself.
    131    const unsigned base_size = OT::Layout::GSUB_impl::LigatureSubstFormat1_2<SmallTypes>::min_size;
    132    unsigned accumulated = base_size;
    133 
    134    unsigned ligature_index = 0;
    135    hb_vector_t<unsigned> split_points;
    136    for (unsigned i = 0; i < ligatureSet.len; i++)
    137    {
    138      accumulated += OT::HBUINT16::static_size; // for ligature set offset
    139      accumulated += OT::Layout::GSUB_impl::LigatureSet<SmallTypes>::min_size; // for ligature set table
    140 
    141      auto liga_set = c.graph.as_table<LigatureSet>(this_index, &ligatureSet[i]);
    142      if (!liga_set.table) {
    143        return hb_vector_t<unsigned> {};
    144      }
    145 
    146      // Finding the object id associated with an array index is O(n)
    147      // so to avoid O(n^2), precompute the mapping by scanning through
    148      // all links
    149      auto index_to_id = ligature_index_to_object_id(liga_set);
    150      if (index_to_id.in_error()) return hb_vector_t<unsigned>();
    151 
    152      for (unsigned j = 0; j < liga_set.table->ligature.len; j++)
    153      {
    154        const unsigned liga_id = index_to_id[j];
    155        if (liga_id == (unsigned) -1) continue; // no outgoing link, ignore
    156        const unsigned liga_size = c.graph.vertices_[liga_id].table_size ();
    157 
    158        accumulated += OT::HBUINT16::static_size; // for ligature offset
    159        accumulated += liga_size; // for the ligature table
    160 
    161        if (accumulated >= (1 << 16))
    162        {
    163          split_points.push(ligature_index);
    164          // We're going to split such that the current ligature will be in the new sub table.
    165          // That means we'll have one ligature subst (base_base), one ligature set, and one liga table
    166          accumulated = base_size + // for liga subst subtable
    167            (OT::HBUINT16::static_size * 2) + // for liga set and liga offset
    168            OT::Layout::GSUB_impl::LigatureSet<SmallTypes>::min_size + // for liga set subtable
    169            liga_size; // for liga sub table
    170        }
    171 
    172        ligature_index++;
    173      }
    174    }
    175 
    176    return split_points;
    177  }
    178 
    179  struct split_context_t
    180  {
    181    gsubgpos_graph_context_t& c;
    182    LigatureSubstFormat1* thiz;
    183    unsigned this_index;
    184    unsigned original_count_;
    185    hb_vector_t<unsigned> liga_counts;
    186 
    187    unsigned original_count ()
    188    {
    189      return original_count_;
    190    }
    191 
    192    unsigned clone_range (unsigned start, unsigned end)
    193    {
    194      return thiz->clone_range (c, this_index, liga_counts, start, end);
    195    }
    196 
    197    bool shrink (unsigned count)
    198    {
    199      return thiz->shrink (c, this_index, original_count(), liga_counts, count);
    200    }
    201  };
    202 
    203  hb_pair_t<unsigned, LigatureSet*> new_liga_set(gsubgpos_graph_context_t& c, unsigned count) const {
    204    unsigned prime_size = OT::Layout::GSUB_impl::LigatureSet<SmallTypes>::min_size
    205                          + count * SmallTypes::size;
    206 
    207    unsigned prime_id = c.create_node (prime_size);
    208    if (prime_id == (unsigned) -1) return hb_pair(-1, nullptr);
    209 
    210    LigatureSet* prime = (LigatureSet*) c.graph.object (prime_id).head;
    211    prime->ligature.len = count;
    212    return hb_pair(prime_id, prime);
    213  }
    214 
    215  void clear_virtual_links (gsubgpos_graph_context_t& c, unsigned node_index) const
    216  {
    217    auto& obj = c.graph.vertices_[node_index].obj;
    218    for (const auto& l : obj.virtual_links)
    219    {
    220      auto& child = c.graph.vertices_[l.objidx];
    221      child.remove_parent(node_index);
    222    }
    223    obj.virtual_links.clear();
    224  }
    225 
    226  void add_virtual_link(gsubgpos_graph_context_t& c, unsigned from, unsigned to) const {
    227    auto& from_obj = c.graph.vertices_[from].obj;
    228    c.graph.vertices_[to].add_parent(from, true);
    229    auto& link = *from_obj.virtual_links.push ();
    230    link.objidx = to;
    231  }
    232 
    233  hb_pair_t<unsigned, unsigned> current_liga_set_bounds (gsubgpos_graph_context_t& c,
    234                                                         unsigned liga_set_index,
    235                                                         const hb_serialize_context_t::object_t& liga_set) const
    236  {
    237    // Finds the actual liga indices present in the liga set currently. Takes
    238    // into account those that have been removed by processing.
    239    unsigned min_index = (unsigned) -1;
    240    unsigned max_index = 0;
    241    for (const auto& l : liga_set.real_links) {
    242      if (l.position < 2) continue;
    243 
    244      unsigned liga_index = (l.position - 2) / 2;
    245      min_index = hb_min(min_index, liga_index);
    246      max_index = hb_max(max_index, liga_index);
    247    }
    248    return hb_pair(min_index, max_index + 1);
    249  }
    250 
    251  void compact_liga_set (gsubgpos_graph_context_t& c, LigatureSet* table, hb_serialize_context_t::object_t& obj) const
    252  {
    253    if (table->ligature.len <= obj.real_links.length) return;
    254 
    255    // compact the remaining linked liga offsets into a continous array and shrink the node as needed.
    256    unsigned to_remove = table->ligature.len - obj.real_links.length;
    257    unsigned new_position = SmallTypes::size;
    258    obj.real_links.qsort(); // for this to work we need to process links in order of position.
    259    for (auto& l : obj.real_links)
    260    {
    261      l.position = new_position;
    262      new_position += SmallTypes::size;
    263    }
    264 
    265    table->ligature.len = obj.real_links.length;
    266    obj.tail -= to_remove * SmallTypes::size;
    267  }
    268 
    269  unsigned clone_range (gsubgpos_graph_context_t& c,
    270                        unsigned this_index,
    271                        hb_vector_t<unsigned> liga_counts,
    272                        unsigned start, unsigned end) const
    273  {
    274    DEBUG_MSG (SUBSET_REPACK, nullptr,
    275               "  Cloning LigatureSubstFormat1 (%u) range [%u, %u).", this_index, start, end);
    276 
    277    // Create an oversized new liga subst, we'll adjust the size down later. We don't know
    278    // the final size until we process it but we also need it to exist while we're processing
    279    // so that nodes can be moved to it as needed.
    280    unsigned prime_size = OT::Layout::GSUB_impl::LigatureSubstFormat1_2<SmallTypes>::min_size
    281                          + ligatureSet.get_size() - ligatureSet.len.get_size();
    282 
    283    unsigned liga_subst_prime_id = c.create_node (prime_size);
    284    if (liga_subst_prime_id == (unsigned) -1) return -1;
    285 
    286    LigatureSubstFormat1* liga_subst_prime = (LigatureSubstFormat1*) c.graph.object (liga_subst_prime_id).head;
    287    liga_subst_prime->format = this->format;
    288    liga_subst_prime->ligatureSet.len = this->ligatureSet.len;
    289 
    290    // Create a place holder coverage prime id since we need to add virtual links to it while
    291    // generating liga and liga sets. Afterwards it will be updated to have the correct coverage.
    292    unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
    293    unsigned coverage_prime_id = c.graph.duplicate(coverage_id);
    294    auto& coverage_prime_vertex = c.graph.vertices_[coverage_prime_id];
    295    auto* coverage_prime_link = c.graph.vertices_[liga_subst_prime_id].obj.real_links.push ();
    296    coverage_prime_link->width = SmallTypes::size;
    297    coverage_prime_link->objidx = coverage_prime_id;
    298    coverage_prime_link->position = 2;
    299    coverage_prime_vertex.add_parent (liga_subst_prime_id, false);
    300 
    301    // Locate all liga sets with ligas between start and end.
    302    // Clone or move them as needed.
    303    unsigned count = 0;
    304    unsigned liga_set_count = 0;
    305    unsigned liga_set_start = -1;
    306    unsigned liga_set_end = 0; // inclusive
    307    for (unsigned i = 0; i < liga_counts.length; i++)
    308    {
    309      unsigned num_ligas = liga_counts[i];
    310 
    311      unsigned current_start = count;
    312      unsigned current_end = count + num_ligas;
    313 
    314      if (current_start >= end || start >= current_end) {
    315        // No intersection, so just skip
    316        count += num_ligas;
    317        continue;
    318      }
    319 
    320      auto liga_set_index = c.graph.index_for_offset(this_index, &ligatureSet[i]);
    321      auto liga_set = c.graph.as_table<LigatureSet>(this_index, &ligatureSet[i]);
    322      if (!liga_set.table) {
    323        return -1;
    324      }
    325 
    326      // Bounds may need to be adjusted if some ligas have been previously removed.
    327      hb_pair_t<unsigned, unsigned> liga_bounds = current_liga_set_bounds(c, liga_set_index, liga_set.vertex->obj);
    328      current_start = hb_max(count + liga_bounds.first, current_start);
    329      current_end = hb_min(count + liga_bounds.second, current_end);
    330 
    331      unsigned liga_set_prime_id;
    332      if (current_start >= start && current_end <= end) {
    333        // This liga set is fully contined within [start, end)
    334        // We can move the entire ligaset to the new liga subset object.
    335        liga_set_end = i;
    336        if (i < liga_set_start) liga_set_start = i;
    337        liga_set_prime_id = c.graph.move_child<> (this_index,
    338                              &ligatureSet[i],
    339                              liga_subst_prime_id,
    340                              &liga_subst_prime->ligatureSet[liga_set_count++]);
    341        compact_liga_set(c, liga_set.table, liga_set.vertex->obj);
    342      }
    343      else
    344      {
    345        // This liga set partially overlaps [start, end). We'll need to create
    346        // a new liga set sub table and move the intersecting ligas to it.
    347        unsigned start_index = hb_max(start, current_start) - count;
    348        unsigned end_index = hb_min(end, current_end) - count;
    349        unsigned liga_count = end_index - start_index;
    350        auto result = new_liga_set(c, liga_count);
    351        liga_set_prime_id = result.first;
    352        if (liga_set_prime_id == (unsigned) -1) return -1;
    353 
    354        c.graph.move_children<OT::Offset16>(
    355          liga_set_index,
    356          2 + start_index * 2,
    357          2 + end_index * 2,
    358          liga_set_prime_id,
    359          2);
    360 
    361        liga_set_end = i;
    362        if (i < liga_set_start) liga_set_start = i;
    363        c.graph.add_link(&liga_subst_prime->ligatureSet[liga_set_count++], liga_subst_prime_id, liga_set_prime_id);
    364      }
    365 
    366      // The new liga and all children set needs to have a virtual link to the new coverage table:
    367      auto& liga_set_prime = c.graph.vertices_[liga_set_prime_id].obj;
    368      clear_virtual_links(c, liga_set_prime_id);
    369      add_virtual_link(c, liga_set_prime_id, coverage_prime_id);
    370      for (const auto& l : liga_set_prime.real_links) {
    371        clear_virtual_links(c, l.objidx);
    372        add_virtual_link(c, l.objidx, coverage_prime_id);
    373      }
    374 
    375      count += num_ligas;
    376    }
    377 
    378    c.graph.vertices_[liga_subst_prime_id].obj.tail -= (liga_subst_prime->ligatureSet.len - liga_set_count) * SmallTypes::size;
    379    liga_subst_prime->ligatureSet.len = liga_set_count;
    380 
    381    if (!Coverage::filter_coverage (c,
    382                                    coverage_prime_id,
    383                                    liga_set_start, liga_set_end + 1))
    384      return -1;
    385 
    386    return liga_subst_prime_id;
    387  }
    388 
    389  bool shrink (gsubgpos_graph_context_t& c,
    390               unsigned this_index,
    391               unsigned old_count,
    392               hb_vector_t<unsigned> liga_counts,
    393               unsigned count)
    394  {
    395    DEBUG_MSG (SUBSET_REPACK, nullptr,
    396               "  Shrinking LigatureSubstFormat1 (%u) to [0, %u).",
    397               this_index,
    398               count);
    399    if (count >= old_count)
    400      return true;
    401 
    402    hb_set_t retained_indices;
    403    unsigned new_liga_set_count = 0;
    404    for (unsigned i = 0; i < liga_counts.length; i++)
    405    {
    406      auto liga_set = c.graph.as_table<LigatureSet>(this_index, &ligatureSet[i]);
    407      if (!liga_set.table) {
    408        return false;
    409      }
    410 
    411      // We need the virtual links to coverage removed from all descendants on this liga subst.
    412      // If any are left when we try to mutate the coverage table later it will be unnessecarily
    413      // duplicated. Code later on will re-add the virtual links as needed (via retained_indices).
    414      clear_virtual_links(c, liga_set.index);
    415      retained_indices.add(liga_set.index);
    416 
    417      auto index_to_id = ligature_index_to_object_id(liga_set);
    418      if (index_to_id.in_error()) return false;
    419 
    420      for (unsigned i = 0; i < liga_set.table->ligature.len; i++) {
    421        unsigned liga_index = index_to_id[i];
    422        if (liga_index != (unsigned) -1) {
    423          clear_virtual_links(c, liga_index);
    424          retained_indices.add(liga_index);
    425        }
    426      }
    427 
    428      unsigned num_ligas = liga_counts[i];
    429      if (num_ligas >= count) {
    430        // drop the trailing liga's from this set and all subsequent liga sets
    431        unsigned num_ligas_to_remove = num_ligas - count;
    432        new_liga_set_count = i + 1;
    433        c.graph.vertices_[liga_set.index].obj.tail -= num_ligas_to_remove * SmallTypes::size;
    434        liga_set.table->ligature.len = count;
    435        break;
    436      } else {
    437        count -= num_ligas;
    438      }
    439    }
    440 
    441    // Adjust liga set array
    442    auto& this_vertex = c.graph.vertices_[this_index];
    443    this_vertex.obj.tail -= (ligatureSet.len - new_liga_set_count) * SmallTypes::size;
    444    ligatureSet.len = new_liga_set_count;
    445 
    446    // Coverage matches the number of liga sets so rebuild as needed
    447    unsigned coverage_idx = c.graph.index_for_offset (this_index, &this->coverage);
    448    if (coverage_idx == (unsigned) -1) return false;
    449 
    450    auto& coverage_v = c.graph.vertices_[coverage_idx];
    451    if (coverage_v.is_shared ())
    452    {
    453      coverage_idx = c.graph.remap_child (this_index, coverage_idx);
    454      if (coverage_idx == (unsigned) -1) return false;
    455    }
    456 
    457    for (unsigned i : retained_indices.iter())
    458      add_virtual_link(c, i, coverage_idx);
    459 
    460    unsigned coverage_size = coverage_v.table_size ();
    461    Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
    462    auto new_coverage =
    463        + hb_zip (coverage_table->iter (), hb_range ())
    464        | hb_filter ([&] (hb_pair_t<unsigned, unsigned> p) {
    465          return p.second < new_liga_set_count;
    466        })
    467        | hb_map_retains_sorting (hb_first)
    468        ;
    469 
    470    return Coverage::make_coverage (c, new_coverage, coverage_idx, coverage_size);
    471  }
    472 };
    473 
    474 struct LigatureSubst : public OT::Layout::GSUB_impl::LigatureSubst
    475 {
    476 
    477  hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c,
    478                                         unsigned this_index)
    479  {
    480    switch (u.format.v) {
    481    case 1:
    482      return ((LigatureSubstFormat1*)(&u.format1))->split_subtables (c, this_index);
    483 #ifndef HB_NO_BEYOND_64K
    484    case 2: HB_FALLTHROUGH;
    485      // Don't split 24bit Ligature Subs
    486 #endif
    487    default:
    488      return hb_vector_t<unsigned> ();
    489    }
    490  }
    491 
    492  bool sanitize (graph_t::vertex_t& vertex) const
    493  {
    494    int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
    495    if (vertex_len < u.format.v.get_size ()) return false;
    496    hb_barrier ();
    497 
    498    switch (u.format.v) {
    499    case 1:
    500      return ((LigatureSubstFormat1*)(&u.format1))->sanitize (vertex);
    501 #ifndef HB_NO_BEYOND_64K
    502    case 2:  HB_FALLTHROUGH;
    503 #endif
    504    default:
    505      // We don't handle format 2 here.
    506      return false;
    507    }
    508  }
    509 };
    510 
    511 }
    512 
    513 #endif  // GRAPH_LIGATURE_GRAPH_HH