tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-ot-shaper-khmer.cc (10865B)


      1 /*
      2 * Copyright © 2011,2012  Google, Inc.
      3 *
      4 *  This is part of HarfBuzz, a text shaping library.
      5 *
      6 * Permission is hereby granted, without written agreement and without
      7 * license or royalty fees, to use, copy, modify, and distribute this
      8 * software and its documentation for any purpose, provided that the
      9 * above copyright notice and the following two paragraphs appear in
     10 * all copies of this software.
     11 *
     12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16 * DAMAGE.
     17 *
     18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23 *
     24 * Google Author(s): Behdad Esfahbod
     25 */
     26 
     27 #include "hb.hh"
     28 
     29 #ifndef HB_NO_OT_SHAPE
     30 
     31 #include "hb-ot-shaper-khmer-machine.hh"
     32 #include "hb-ot-shaper-indic.hh"
     33 #include "hb-ot-layout.hh"
     34 
     35 
     36 /*
     37 * Khmer shaper.
     38 */
     39 
     40 
     41 static const hb_ot_map_feature_t
     42 khmer_features[] =
     43 {
     44  /*
     45   * Basic features.
     46   * These features are applied all at once, before reordering, constrained
     47   * to the syllable.
     48   */
     49  {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
     50  {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
     51  {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
     52  {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
     53  {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS | F_PER_SYLLABLE},
     54  /*
     55   * Other features.
     56   * These features are applied all at once after clearing syllables.
     57   */
     58  {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
     59  {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
     60  {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
     61  {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
     62 };
     63 
     64 /*
     65 * Must be in the same order as the khmer_features array.
     66 */
     67 enum {
     68  KHMER_PREF,
     69  KHMER_BLWF,
     70  KHMER_ABVF,
     71  KHMER_PSTF,
     72  KHMER_CFAR,
     73 
     74  _KHMER_PRES,
     75  _KHMER_ABVS,
     76  _KHMER_BLWS,
     77  _KHMER_PSTS,
     78 
     79  KHMER_NUM_FEATURES,
     80  KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
     81 };
     82 
     83 static inline void
     84 set_khmer_properties (hb_glyph_info_t &info)
     85 {
     86  hb_codepoint_t u = info.codepoint;
     87  unsigned int type = hb_indic_get_categories (u);
     88 
     89  info.khmer_category() = (khmer_category_t) (type & 0xFFu);
     90 }
     91 
     92 static bool
     93 setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
     94 	       hb_font_t *font,
     95 	       hb_buffer_t *buffer);
     96 static bool
     97 reorder_khmer (const hb_ot_shape_plan_t *plan,
     98        hb_font_t *font,
     99        hb_buffer_t *buffer);
    100 
    101 static void
    102 collect_features_khmer (hb_ot_shape_planner_t *plan)
    103 {
    104  hb_ot_map_builder_t *map = &plan->map;
    105 
    106  /* Do this before any lookups have been applied. */
    107  map->add_gsub_pause (setup_syllables_khmer);
    108  map->add_gsub_pause (reorder_khmer);
    109 
    110  /* Testing suggests that Uniscribe does NOT pause between basic
    111   * features.  Test with KhmerUI.ttf and the following three
    112   * sequences:
    113   *
    114   *   U+1789,U+17BC
    115   *   U+1789,U+17D2,U+1789
    116   *   U+1789,U+17D2,U+1789,U+17BC
    117   *
    118   * https://github.com/harfbuzz/harfbuzz/issues/974
    119   */
    120  map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE);
    121  map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE);
    122 
    123  unsigned int i = 0;
    124  for (; i < KHMER_BASIC_FEATURES; i++)
    125    map->add_feature (khmer_features[i]);
    126 
    127  /* https://github.com/harfbuzz/harfbuzz/issues/3531 */
    128  map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var
    129 
    130  for (; i < KHMER_NUM_FEATURES; i++)
    131    map->add_feature (khmer_features[i]);
    132 }
    133 
    134 static void
    135 override_features_khmer (hb_ot_shape_planner_t *plan)
    136 {
    137  hb_ot_map_builder_t *map = &plan->map;
    138 
    139  /* Khmer spec has 'clig' as part of required shaping features:
    140   * "Apply feature 'clig' to form ligatures that are desired for
    141   * typographical correctness.", hence in overrides... */
    142  map->enable_feature (HB_TAG('c','l','i','g'));
    143 
    144  map->disable_feature (HB_TAG('l','i','g','a'));
    145 }
    146 
    147 
    148 struct khmer_shape_plan_t
    149 {
    150  hb_mask_t mask_array[KHMER_NUM_FEATURES];
    151 };
    152 
    153 static void *
    154 data_create_khmer (const hb_ot_shape_plan_t *plan)
    155 {
    156  khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) hb_calloc (1, sizeof (khmer_shape_plan_t));
    157  if (unlikely (!khmer_plan))
    158    return nullptr;
    159 
    160  for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
    161    khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
    162 			 0 : plan->map.get_1_mask (khmer_features[i].tag);
    163 
    164  return khmer_plan;
    165 }
    166 
    167 static void
    168 data_destroy_khmer (void *data)
    169 {
    170  hb_free (data);
    171 }
    172 
    173 static void
    174 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
    175 	   hb_buffer_t              *buffer,
    176 	   hb_font_t                *font HB_UNUSED)
    177 {
    178  HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
    179 
    180  /* We cannot setup masks here.  We save information about characters
    181   * and setup masks later on in a pause-callback. */
    182 
    183  unsigned int count = buffer->len;
    184  hb_glyph_info_t *info = buffer->info;
    185  for (unsigned int i = 0; i < count; i++)
    186    set_khmer_properties (info[i]);
    187 }
    188 
    189 static bool
    190 setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
    191 	       hb_font_t *font HB_UNUSED,
    192 	       hb_buffer_t *buffer)
    193 {
    194  HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
    195  find_syllables_khmer (buffer);
    196  foreach_syllable (buffer, start, end)
    197    buffer->unsafe_to_break (start, end);
    198  return false;
    199 }
    200 
    201 
    202 /* Rules from:
    203 * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
    204 
    205 static void
    206 reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
    207 		    hb_face_t *face HB_UNUSED,
    208 		    hb_buffer_t *buffer,
    209 		    unsigned int start, unsigned int end)
    210 {
    211  const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
    212  hb_glyph_info_t *info = buffer->info;
    213 
    214  /* Setup masks. */
    215  {
    216    /* Post-base */
    217    hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] |
    218 	     khmer_plan->mask_array[KHMER_ABVF] |
    219 	     khmer_plan->mask_array[KHMER_PSTF];
    220    for (unsigned int i = start + 1; i < end; i++)
    221      info[i].mask  |= mask;
    222  }
    223 
    224  unsigned int num_coengs = 0;
    225  for (unsigned int i = start + 1; i < end; i++)
    226  {
    227    /* """
    228     * When a COENG + (Cons | IndV) combination are found (and subscript count
    229     * is less than two) the character combination is handled according to the
    230     * subscript type of the character following the COENG.
    231     *
    232     * ...
    233     *
    234     * Subscript Type 2 - The COENG + RO characters are reordered to immediately
    235     * before the base glyph. Then the COENG + RO characters are assigned to have
    236     * the 'pref' OpenType feature applied to them.
    237     * """
    238     */
    239    if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end)
    240    {
    241      num_coengs++;
    242 
    243      if (info[i + 1].khmer_category() == K_Cat(Ra))
    244      {
    245 for (unsigned int j = 0; j < 2; j++)
    246   info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
    247 
    248 /* Move the Coeng,Ro sequence to the start. */
    249 buffer->merge_clusters (start, i + 2);
    250 hb_glyph_info_t t0 = info[i];
    251 hb_glyph_info_t t1 = info[i + 1];
    252 memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0]));
    253 info[start] = t0;
    254 info[start + 1] = t1;
    255 
    256 /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
    257  * Read the feature spec.
    258  * This allows distinguishing the following cases with MS Khmer fonts:
    259  * U+1784,U+17D2,U+179A,U+17D2,U+1782
    260  * U+1784,U+17D2,U+1782,U+17D2,U+179A
    261  */
    262 if (khmer_plan->mask_array[KHMER_CFAR])
    263   for (unsigned int j = i + 2; j < end; j++)
    264     info[j].mask |= khmer_plan->mask_array[KHMER_CFAR];
    265 
    266 num_coengs = 2; /* Done. */
    267      }
    268    }
    269 
    270    /* Reorder left matra piece. */
    271    else if (info[i].khmer_category() == K_Cat(VPre))
    272    {
    273      /* Move to the start. */
    274      buffer->merge_clusters (start, i + 1);
    275      hb_glyph_info_t t = info[i];
    276      memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0]));
    277      info[start] = t;
    278    }
    279  }
    280 }
    281 
    282 static void
    283 reorder_syllable_khmer (const hb_ot_shape_plan_t *plan,
    284 		hb_face_t *face,
    285 		hb_buffer_t *buffer,
    286 		unsigned int start, unsigned int end)
    287 {
    288  khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
    289  switch (syllable_type)
    290  {
    291    case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
    292    case khmer_consonant_syllable:
    293     reorder_consonant_syllable (plan, face, buffer, start, end);
    294     break;
    295 
    296    case khmer_non_khmer_cluster:
    297      break;
    298  }
    299 }
    300 
    301 static bool
    302 reorder_khmer (const hb_ot_shape_plan_t *plan,
    303        hb_font_t *font,
    304        hb_buffer_t *buffer)
    305 {
    306  bool ret = false;
    307  if (buffer->message (font, "start reordering khmer"))
    308  {
    309    if (hb_syllabic_insert_dotted_circles (font, buffer,
    310 				   khmer_broken_cluster,
    311 				   K_Cat(DOTTEDCIRCLE),
    312 				   (unsigned) -1))
    313      ret = true;
    314 
    315    foreach_syllable (buffer, start, end)
    316      reorder_syllable_khmer (plan, font->face, buffer, start, end);
    317    (void) buffer->message (font, "end reordering khmer");
    318  }
    319  HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
    320 
    321  return ret;
    322 }
    323 
    324 
    325 static bool
    326 decompose_khmer (const hb_ot_shape_normalize_context_t *c,
    327 	 hb_codepoint_t  ab,
    328 	 hb_codepoint_t *a,
    329 	 hb_codepoint_t *b)
    330 {
    331  switch (ab)
    332  {
    333    /*
    334     * Decompose split matras that don't have Unicode decompositions.
    335     */
    336 
    337    /* Khmer */
    338    case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
    339    case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
    340    case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
    341    case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
    342    case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
    343  }
    344 
    345  return (bool) c->unicode->decompose (ab, a, b);
    346 }
    347 
    348 static bool
    349 compose_khmer (const hb_ot_shape_normalize_context_t *c,
    350        hb_codepoint_t  a,
    351        hb_codepoint_t  b,
    352        hb_codepoint_t *ab)
    353 {
    354  /* Avoid recomposing split matras. */
    355  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
    356    return false;
    357 
    358  return (bool) c->unicode->compose (a, b, ab);
    359 }
    360 
    361 
    362 const hb_ot_shaper_t _hb_ot_shaper_khmer =
    363 {
    364  collect_features_khmer,
    365  override_features_khmer,
    366  data_create_khmer,
    367  data_destroy_khmer,
    368  nullptr, /* preprocess_text */
    369  nullptr, /* postprocess_glyphs */
    370  decompose_khmer,
    371  compose_khmer,
    372  setup_masks_khmer,
    373  nullptr, /* reorder_marks */
    374  HB_TAG_NONE, /* gpos_tag */
    375  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
    376  HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
    377  false, /* fallback_position */
    378 };
    379 
    380 
    381 #endif