tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-ot-shaper-indic.cc (51073B)


      1 /*
      2 * Copyright © 2011,2012  Google, Inc.
      3 *
      4 *  This is part of HarfBuzz, a text shaping library.
      5 *
      6 * Permission is hereby granted, without written agreement and without
      7 * license or royalty fees, to use, copy, modify, and distribute this
      8 * software and its documentation for any purpose, provided that the
      9 * above copyright notice and the following two paragraphs appear in
     10 * all copies of this software.
     11 *
     12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16 * DAMAGE.
     17 *
     18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23 *
     24 * Google Author(s): Behdad Esfahbod
     25 */
     26 
     27 #include "hb.hh"
     28 
     29 #ifndef HB_NO_OT_SHAPE
     30 
     31 #include "hb-ot-shaper-indic.hh"
     32 #include "hb-ot-shaper-indic-machine.hh"
     33 #include "hb-ot-shaper-vowel-constraints.hh"
     34 #include "hb-ot-layout.hh"
     35 
     36 
     37 /*
     38 * Indic shaper.
     39 */
     40 
     41 
     42 static inline void
     43 set_indic_properties (hb_glyph_info_t &info)
     44 {
     45  hb_codepoint_t u = info.codepoint;
     46  unsigned int type = hb_indic_get_categories (u);
     47 
     48  info.indic_category() = (indic_category_t) (type & 0xFFu);
     49  info.indic_position() = (indic_position_t) (type >> 8);
     50 }
     51 
     52 
     53 static inline bool
     54 is_one_of (const hb_glyph_info_t &info, unsigned int flags)
     55 {
     56  /* If it ligated, all bets are off. */
     57  if (_hb_glyph_info_ligated (&info)) return false;
     58  return !!(FLAG_UNSAFE (info.indic_category()) & flags);
     59 }
     60 
     61 /* Note:
     62 *
     63 * We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
     64 * cannot happen in a consonant syllable.  The plus side however is, we can call the
     65 * consonant syllable logic from the vowel syllable function and get it all right!
     66 *
     67 * Keep in sync with consonant_categories in the generator. */
     68 #define CONSONANT_FLAGS_INDIC (FLAG (I_Cat(C)) | FLAG (I_Cat(CS)) | FLAG (I_Cat(Ra)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(V)) | FLAG (I_Cat(PLACEHOLDER)) | FLAG (I_Cat(DOTTEDCIRCLE)))
     69 
     70 static inline bool
     71 is_consonant (const hb_glyph_info_t &info)
     72 {
     73  return is_one_of (info, CONSONANT_FLAGS_INDIC);
     74 }
     75 
     76 #define JOINER_FLAGS (FLAG (I_Cat(ZWJ)) | FLAG (I_Cat(ZWNJ)))
     77 
     78 static inline bool
     79 is_joiner (const hb_glyph_info_t &info)
     80 {
     81  return is_one_of (info, JOINER_FLAGS);
     82 }
     83 
     84 static inline bool
     85 is_halant (const hb_glyph_info_t &info)
     86 {
     87  return is_one_of (info, FLAG (I_Cat(H)));
     88 }
     89 
     90 struct hb_indic_would_substitute_feature_t
     91 {
     92  void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
     93  {
     94    zero_context = zero_context_;
     95    lookups = map->get_stage_lookups (0/*GSUB*/,
     96 			      map->get_feature_stage (0/*GSUB*/, feature_tag));
     97  }
     98 
     99  bool would_substitute (const hb_codepoint_t *glyphs,
    100 		 unsigned int          glyphs_count,
    101 		 hb_face_t            *face) const
    102  {
    103    for (const auto &lookup : lookups)
    104      if (hb_ot_layout_lookup_would_substitute (face, lookup.index, glyphs, glyphs_count, zero_context))
    105 return true;
    106    return false;
    107  }
    108 
    109  private:
    110  hb_array_t<const hb_ot_map_t::lookup_map_t> lookups;
    111  bool zero_context;
    112 };
    113 
    114 
    115 /*
    116 * Indic configurations.  Note that we do not want to keep every single script-specific
    117 * behavior in these tables necessarily.  This should mainly be used for per-script
    118 * properties that are cheaper keeping here, than in the code.  Ie. if, say, one and
    119 * only one script has an exception, that one script can be if'ed directly in the code,
    120 * instead of adding a new flag in these structs.
    121 */
    122 
    123 enum reph_position_t {
    124  REPH_POS_AFTER_MAIN  = POS_AFTER_MAIN,
    125  REPH_POS_BEFORE_SUB  = POS_BEFORE_SUB,
    126  REPH_POS_AFTER_SUB   = POS_AFTER_SUB,
    127  REPH_POS_BEFORE_POST = POS_BEFORE_POST,
    128  REPH_POS_AFTER_POST  = POS_AFTER_POST
    129 };
    130 enum reph_mode_t {
    131  REPH_MODE_IMPLICIT,  /* Reph formed out of initial Ra,H sequence. */
    132  REPH_MODE_EXPLICIT,  /* Reph formed out of initial Ra,H,ZWJ sequence. */
    133  REPH_MODE_LOG_REPHA  /* Encoded Repha character, needs reordering. */
    134 };
    135 enum blwf_mode_t {
    136  BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */
    137  BLWF_MODE_POST_ONLY     /* Below-forms feature applied to post-base only. */
    138 };
    139 struct indic_config_t
    140 {
    141  hb_script_t     script;
    142  bool            has_old_spec;
    143  hb_codepoint_t  virama;
    144  reph_position_t reph_pos;
    145  reph_mode_t     reph_mode;
    146  blwf_mode_t     blwf_mode;
    147 };
    148 
    149 static const indic_config_t indic_configs[] =
    150 {
    151  /* Default.  Should be first. */
    152  {HB_SCRIPT_INVALID,	false,      0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    153  {HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    154  {HB_SCRIPT_BENGALI,	true, 0x09CDu,REPH_POS_AFTER_SUB,  REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    155  {HB_SCRIPT_GURMUKHI,	true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    156  {HB_SCRIPT_GUJARATI,	true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    157  {HB_SCRIPT_ORIYA,	true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    158  {HB_SCRIPT_TAMIL,	true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
    159  {HB_SCRIPT_TELUGU,	true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
    160  {HB_SCRIPT_KANNADA,	true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
    161  {HB_SCRIPT_MALAYALAM,	true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
    162 };
    163 
    164 
    165 static const hb_ot_map_feature_t
    166 indic_features[] =
    167 {
    168  /*
    169   * Basic features.
    170   * These features are applied in order, one at a time, after initial_reordering,
    171   * constrained to the syllable.
    172   */
    173  {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    174  {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    175  {HB_TAG('r','p','h','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    176  {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    177  {HB_TAG('p','r','e','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    178  {HB_TAG('b','l','w','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    179  {HB_TAG('a','b','v','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    180  {HB_TAG('h','a','l','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    181  {HB_TAG('p','s','t','f'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    182  {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    183  {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    184  /*
    185   * Other features.
    186   * These features are applied all at once, after final_reordering, constrained
    187   * to the syllable.
    188   * Default Bengali font in Windows for example has intermixed
    189   * lookups for init,pres,abvs,blws features.
    190   */
    191  {HB_TAG('i','n','i','t'),        F_MANUAL_JOINERS | F_PER_SYLLABLE},
    192  {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    193  {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    194  {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    195  {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    196  {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE},
    197 };
    198 
    199 /*
    200 * Must be in the same order as the indic_features array.
    201 */
    202 enum {
    203  _INDIC_NUKT,
    204  _INDIC_AKHN,
    205  INDIC_RPHF,
    206  _INDIC_RKRF,
    207  INDIC_PREF,
    208  INDIC_BLWF,
    209  INDIC_ABVF,
    210  INDIC_HALF,
    211  INDIC_PSTF,
    212  _INDIC_VATU,
    213  _INDIC_CJCT,
    214 
    215  INDIC_INIT,
    216  _INDIC_PRES,
    217  _INDIC_ABVS,
    218  _INDIC_BLWS,
    219  _INDIC_PSTS,
    220  _INDIC_HALN,
    221 
    222  INDIC_NUM_FEATURES,
    223  INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */
    224 };
    225 
    226 static bool
    227 setup_syllables_indic (const hb_ot_shape_plan_t *plan,
    228 	       hb_font_t *font,
    229 	       hb_buffer_t *buffer);
    230 static bool
    231 initial_reordering_indic (const hb_ot_shape_plan_t *plan,
    232 		  hb_font_t *font,
    233 		  hb_buffer_t *buffer);
    234 static bool
    235 final_reordering_indic (const hb_ot_shape_plan_t *plan,
    236 		hb_font_t *font,
    237 		hb_buffer_t *buffer);
    238 
    239 static void
    240 collect_features_indic (hb_ot_shape_planner_t *plan)
    241 {
    242  hb_ot_map_builder_t *map = &plan->map;
    243 
    244  /* Do this before any lookups have been applied. */
    245  map->add_gsub_pause (setup_syllables_indic);
    246 
    247  map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE);
    248  /* The Indic specs do not require ccmp, but we apply it here since if
    249   * there is a use of it, it's typically at the beginning. */
    250  map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE);
    251 
    252 
    253  unsigned int i = 0;
    254  map->add_gsub_pause (initial_reordering_indic);
    255 
    256  for (; i < INDIC_BASIC_FEATURES; i++) {
    257    map->add_feature (indic_features[i]);
    258    map->add_gsub_pause (nullptr);
    259  }
    260 
    261  map->add_gsub_pause (final_reordering_indic);
    262 
    263  for (; i < INDIC_NUM_FEATURES; i++)
    264    map->add_feature (indic_features[i]);
    265 }
    266 
    267 static void
    268 override_features_indic (hb_ot_shape_planner_t *plan)
    269 {
    270  plan->map.disable_feature (HB_TAG('l','i','g','a'));
    271  plan->map.add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var
    272 }
    273 
    274 
    275 struct indic_shape_plan_t
    276 {
    277  bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
    278  {
    279    hb_codepoint_t glyph = virama_glyph;
    280    if (unlikely (glyph == (hb_codepoint_t) -1))
    281    {
    282      if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
    283 glyph = 0;
    284      /* Technically speaking, the spec says we should apply 'locl' to virama too.
    285       * Maybe one day... */
    286 
    287      /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
    288       * during shape planning...  Instead, overwrite it here. */
    289      virama_glyph = (int) glyph;
    290    }
    291 
    292    *pglyph = glyph;
    293    return glyph != 0;
    294  }
    295 
    296  const indic_config_t *config;
    297 
    298  bool is_old_spec;
    299  mutable hb_atomic_t<hb_codepoint_t> virama_glyph;
    300 
    301  hb_indic_would_substitute_feature_t rphf;
    302  hb_indic_would_substitute_feature_t pref;
    303  hb_indic_would_substitute_feature_t blwf;
    304  hb_indic_would_substitute_feature_t pstf;
    305  hb_indic_would_substitute_feature_t vatu;
    306 
    307  hb_mask_t mask_array[INDIC_NUM_FEATURES];
    308 };
    309 
    310 static void *
    311 data_create_indic (const hb_ot_shape_plan_t *plan)
    312 {
    313  indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) hb_calloc (1, sizeof (indic_shape_plan_t));
    314  if (unlikely (!indic_plan))
    315    return nullptr;
    316 
    317  indic_plan->config = &indic_configs[0];
    318  for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++)
    319    if (plan->props.script == indic_configs[i].script) {
    320      indic_plan->config = &indic_configs[i];
    321      break;
    322    }
    323 
    324  indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2');
    325  indic_plan->virama_glyph = -1;
    326 
    327  /* Use zero-context would_substitute() matching for new-spec of the main
    328   * Indic scripts, and scripts with one spec only, but not for old-specs.
    329   * The new-spec for all dual-spec scripts says zero-context matching happens.
    330   *
    331   * However, testing with Malayalam shows that old and new spec both allow
    332   * context.  Testing with Bengali new-spec however shows that it doesn't.
    333   * So, the heuristic here is the way it is.  It should *only* be changed,
    334   * as we discover more cases of what Windows does.  DON'T TOUCH OTHERWISE.
    335   */
    336  bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM;
    337  indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context);
    338  indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context);
    339  indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context);
    340  indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context);
    341  indic_plan->vatu.init (&plan->map, HB_TAG('v','a','t','u'), zero_context);
    342 
    343  for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++)
    344    indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?
    345 			 0 : plan->map.get_1_mask (indic_features[i].tag);
    346 
    347  return indic_plan;
    348 }
    349 
    350 static void
    351 data_destroy_indic (void *data)
    352 {
    353  hb_free (data);
    354 }
    355 
    356 static indic_position_t
    357 consonant_position_from_face (const indic_shape_plan_t *indic_plan,
    358 		      const hb_codepoint_t consonant,
    359 		      const hb_codepoint_t virama,
    360 		      hb_face_t *face)
    361 {
    362  /* For old-spec, the order of glyphs is Consonant,Virama,
    363   * whereas for new-spec, it's Virama,Consonant.  However,
    364   * some broken fonts (like Free Sans) simply copied lookups
    365   * from old-spec to new-spec without modification.
    366   * And oddly enough, Uniscribe seems to respect those lookups.
    367   * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
    368   * base at 0.  The font however, only has lookups matching
    369   * 930,94D in 'blwf', not the expected 94D,930 (with new-spec
    370   * table).  As such, we simply match both sequences.  Seems
    371   * to work.
    372   *
    373   * Vatu is done as well, for:
    374   * https://github.com/harfbuzz/harfbuzz/issues/1587
    375   */
    376  hb_codepoint_t glyphs[3] = {virama, consonant, virama};
    377  if (indic_plan->blwf.would_substitute (glyphs  , 2, face) ||
    378      indic_plan->blwf.would_substitute (glyphs+1, 2, face) ||
    379      indic_plan->vatu.would_substitute (glyphs  , 2, face) ||
    380      indic_plan->vatu.would_substitute (glyphs+1, 2, face))
    381    return POS_BELOW_C;
    382  if (indic_plan->pstf.would_substitute (glyphs  , 2, face) ||
    383      indic_plan->pstf.would_substitute (glyphs+1, 2, face))
    384    return POS_POST_C;
    385  if (indic_plan->pref.would_substitute (glyphs  , 2, face) ||
    386      indic_plan->pref.would_substitute (glyphs+1, 2, face))
    387    return POS_POST_C;
    388  return POS_BASE_C;
    389 }
    390 
    391 static void
    392 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
    393 	   hb_buffer_t              *buffer,
    394 	   hb_font_t                *font HB_UNUSED)
    395 {
    396  HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);
    397  HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);
    398 
    399  /* We cannot setup masks here.  We save information about characters
    400   * and setup masks later on in a pause-callback. */
    401 
    402  unsigned int count = buffer->len;
    403  hb_glyph_info_t *info = buffer->info;
    404  for (unsigned int i = 0; i < count; i++)
    405    set_indic_properties (info[i]);
    406 }
    407 
    408 static bool
    409 setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
    410 	       hb_font_t *font HB_UNUSED,
    411 	       hb_buffer_t *buffer)
    412 {
    413  HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
    414  find_syllables_indic (buffer);
    415  foreach_syllable (buffer, start, end)
    416    buffer->unsafe_to_break (start, end);
    417  return false;
    418 }
    419 
    420 static int
    421 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
    422 {
    423  int a = pa->indic_position();
    424  int b = pb->indic_position();
    425 
    426  return (int) a - (int) b;
    427 }
    428 
    429 
    430 
    431 static void
    432 update_consonant_positions_indic (const hb_ot_shape_plan_t *plan,
    433 			  hb_font_t         *font,
    434 			  hb_buffer_t       *buffer)
    435 {
    436  const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
    437 
    438  hb_codepoint_t virama;
    439  if (indic_plan->load_virama_glyph (font, &virama))
    440  {
    441    hb_face_t *face = font->face;
    442    unsigned int count = buffer->len;
    443    hb_glyph_info_t *info = buffer->info;
    444    for (unsigned int i = 0; i < count; i++)
    445      if (info[i].indic_position() == POS_BASE_C)
    446      {
    447 hb_codepoint_t consonant = info[i].codepoint;
    448 info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face);
    449      }
    450  }
    451 }
    452 
    453 
    454 /* Rules from:
    455 * https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
    456 
    457 static void
    458 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    459 			       hb_face_t *face,
    460 			       hb_buffer_t *buffer,
    461 			       unsigned int start, unsigned int end)
    462 {
    463  const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
    464  hb_glyph_info_t *info = buffer->info;
    465 
    466  /* https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
    467   * // For compatibility with legacy usage in Kannada,
    468   * // Ra+h+ZWJ must behave like Ra+ZWJ+h...
    469   */
    470  if (buffer->props.script == HB_SCRIPT_KANNADA &&
    471      start + 3 <= end &&
    472      is_one_of (info[start  ], FLAG (I_Cat(Ra))) &&
    473      is_one_of (info[start+1], FLAG (I_Cat(H))) &&
    474      is_one_of (info[start+2], FLAG (I_Cat(ZWJ))))
    475  {
    476    buffer->merge_clusters (start+1, start+3);
    477    hb_swap (info[start+1], info[start+2]);
    478  }
    479 
    480  /* 1. Find base consonant:
    481   *
    482   * The shaping engine finds the base consonant of the syllable, using the
    483   * following algorithm: starting from the end of the syllable, move backwards
    484   * until a consonant is found that does not have a below-base or post-base
    485   * form (post-base forms have to follow below-base forms), or that is not a
    486   * pre-base-reordering Ra, or arrive at the first consonant. The consonant
    487   * stopped at will be the base.
    488   *
    489   *   o If the syllable starts with Ra + Halant (in a script that has Reph)
    490   *     and has more than one consonant, Ra is excluded from candidates for
    491   *     base consonants.
    492   */
    493 
    494  unsigned int base = end;
    495  bool has_reph = false;
    496 
    497  {
    498    /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
    499     *    and has more than one consonant, Ra is excluded from candidates for
    500     *    base consonants. */
    501    unsigned int limit = start;
    502    if (indic_plan->mask_array[INDIC_RPHF] &&
    503 start + 3 <= end &&
    504 (
    505  (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
    506  (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == I_Cat(ZWJ))
    507 ))
    508    {
    509      /* See if it matches the 'rphf' feature. */
    510      hb_codepoint_t glyphs[3] = {info[start].codepoint,
    511 			  info[start + 1].codepoint,
    512 			  indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ?
    513 			    info[start + 2].codepoint : 0};
    514      if (indic_plan->rphf.would_substitute (glyphs, 2, face) ||
    515   (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT &&
    516    indic_plan->rphf.would_substitute (glyphs, 3, face)))
    517      {
    518 limit += 2;
    519 while (limit < end && is_joiner (info[limit]))
    520   limit++;
    521 base = start;
    522 has_reph = true;
    523      }
    524    } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == I_Cat(Repha))
    525    {
    526 limit += 1;
    527 while (limit < end && is_joiner (info[limit]))
    528   limit++;
    529 base = start;
    530 has_reph = true;
    531    }
    532 
    533    {
    534      /* -> starting from the end of the syllable, move backwards */
    535      unsigned int i = end;
    536      bool seen_below = false;
    537      do {
    538 i--;
    539 /* -> until a consonant is found */
    540 if (is_consonant (info[i]))
    541 {
    542   /* -> that does not have a below-base or post-base form
    543    * (post-base forms have to follow below-base forms), */
    544   if (info[i].indic_position() != POS_BELOW_C &&
    545       (info[i].indic_position() != POS_POST_C || seen_below))
    546   {
    547     base = i;
    548     break;
    549   }
    550   if (info[i].indic_position() == POS_BELOW_C)
    551     seen_below = true;
    552 
    553   /* -> or that is not a pre-base-reordering Ra,
    554    *
    555    * IMPLEMENTATION NOTES:
    556    *
    557    * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
    558    * by the logic above already.
    559    */
    560 
    561   /* -> or arrive at the first consonant. The consonant stopped at will
    562    * be the base. */
    563   base = i;
    564 }
    565 else
    566 {
    567   /* A ZWJ after a Halant stops the base search, and requests an explicit
    568    * half form.
    569    * A ZWJ before a Halant, requests a subjoined form instead, and hence
    570    * search continues.  This is particularly important for Bengali
    571    * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
    572   if (start < i &&
    573       info[i].indic_category() == I_Cat(ZWJ) &&
    574       info[i - 1].indic_category() == I_Cat(H))
    575     break;
    576 }
    577      } while (i > limit);
    578    }
    579 
    580    /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
    581     *    and has more than one consonant, Ra is excluded from candidates for
    582     *    base consonants.
    583     *
    584     *  Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
    585    if (has_reph && base == start && limit - base <= 2) {
    586      /* Have no other consonant, so Reph is not formed and Ra becomes base. */
    587      has_reph = false;
    588    }
    589  }
    590 
    591 
    592  /* 2. Decompose and reorder Matras:
    593   *
    594   * Each matra and any syllable modifier sign in the syllable are moved to the
    595   * appropriate position relative to the consonant(s) in the syllable. The
    596   * shaping engine decomposes two- or three-part matras into their constituent
    597   * parts before any repositioning. Matra characters are classified by which
    598   * consonant in a conjunct they have affinity for and are reordered to the
    599   * following positions:
    600   *
    601   *   o Before first half form in the syllable
    602   *   o After subjoined consonants
    603   *   o After post-form consonant
    604   *   o After main consonant (for above marks)
    605   *
    606   * IMPLEMENTATION NOTES:
    607   *
    608   * The normalize() routine has already decomposed matras for us, so we don't
    609   * need to worry about that.
    610   */
    611 
    612 
    613  /* 3.  Reorder marks to canonical order:
    614   *
    615   * Adjacent nukta and halant or nukta and vedic sign are always repositioned
    616   * if necessary, so that the nukta is first.
    617   *
    618   * IMPLEMENTATION NOTES:
    619   *
    620   * We don't need to do this: the normalize() routine already did this for us.
    621   */
    622 
    623 
    624  /* Reorder characters */
    625 
    626  for (unsigned int i = start; i < base; i++)
    627    info[i].indic_position() = hb_min (POS_PRE_C, (indic_position_t) info[i].indic_position());
    628 
    629  if (base < end)
    630    info[base].indic_position() = POS_BASE_C;
    631 
    632  /* Handle beginning Ra */
    633  if (has_reph)
    634    info[start].indic_position() = POS_RA_TO_BECOME_REPH;
    635 
    636  /* For old-style Indic script tags, move the first post-base Halant after
    637   * last consonant.
    638   *
    639   * Reports suggest that in some scripts Uniscribe does this only if there
    640   * is *not* a Halant after last consonant already.  We know that is the
    641   * case for Kannada, while it reorders unconditionally in other scripts,
    642   * eg. Malayalam, Bengali, and Devanagari.  We don't currently know about
    643   * other scripts, so we block Kannada.
    644   *
    645   * Kannada test case:
    646   * U+0C9A,U+0CCD,U+0C9A,U+0CCD
    647   * With some versions of Lohit Kannada.
    648   * https://bugs.freedesktop.org/show_bug.cgi?id=59118
    649   *
    650   * Malayalam test case:
    651   * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
    652   * With lohit-ttf-20121122/Lohit-Malayalam.ttf
    653   *
    654   * Bengali test case:
    655   * U+0998,U+09CD,U+09AF,U+09CD
    656   * With Windows XP vrinda.ttf
    657   * https://github.com/harfbuzz/harfbuzz/issues/1073
    658   *
    659   * Devanagari test case:
    660   * U+091F,U+094D,U+0930,U+094D
    661   * With chandas.ttf
    662   * https://github.com/harfbuzz/harfbuzz/issues/1071
    663   */
    664  if (indic_plan->is_old_spec)
    665  {
    666    bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
    667    for (unsigned int i = base + 1; i < end; i++)
    668      if (info[i].indic_category() == I_Cat(H))
    669      {
    670 unsigned int j;
    671 for (j = end - 1; j > i; j--)
    672   if (is_consonant (info[j]) ||
    673       (disallow_double_halants && info[j].indic_category() == I_Cat(H)))
    674     break;
    675 if (info[j].indic_category() != I_Cat(H) && j > i) {
    676   /* Move Halant to after last consonant. */
    677   hb_glyph_info_t t = info[i];
    678   memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
    679   info[j] = t;
    680 }
    681 break;
    682      }
    683  }
    684 
    685  /* Attach misc marks to previous char to move with them. */
    686  {
    687    indic_position_t last_pos = POS_START;
    688    for (unsigned int i = start; i < end; i++)
    689    {
    690      if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (I_Cat(N)) | FLAG (I_Cat(RS)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(H)))))
    691      {
    692 info[i].indic_position() = last_pos;
    693 if (unlikely (info[i].indic_category() == I_Cat(H) &&
    694 	      info[i].indic_position() == POS_PRE_M))
    695 {
    696   /*
    697    * Uniscribe doesn't move the Halant with Left Matra.
    698    * TEST: U+092B,U+093F,U+094D
    699    * We follow.
    700    */
    701   for (unsigned int j = i; j > start; j--)
    702     if (info[j - 1].indic_position() != POS_PRE_M) {
    703       info[i].indic_position() = info[j - 1].indic_position();
    704       break;
    705     }
    706 }
    707      } else if (info[i].indic_position() != POS_SMVD) {
    708 if (info[i].indic_category() == I_Cat(MPst) &&
    709     i > start && info[i - 1].indic_category() == I_Cat(SM))
    710   info[i - 1].indic_position() = info[i].indic_position();
    711 last_pos = (indic_position_t) info[i].indic_position();
    712      }
    713    }
    714  }
    715  /* For post-base consonants let them own anything before them
    716   * since the last consonant or matra. */
    717  {
    718    unsigned int last = base;
    719    for (unsigned int i = base + 1; i < end; i++)
    720      if (is_consonant (info[i]))
    721      {
    722 for (unsigned int j = last + 1; j < i; j++)
    723   if (info[j].indic_position() < POS_SMVD)
    724     info[j].indic_position() = info[i].indic_position();
    725 last = i;
    726      } else if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
    727 last = i;
    728  }
    729 
    730 
    731  {
    732    /* Use syllable() for sort accounting temporarily. */
    733    unsigned int syllable = info[start].syllable();
    734    for (unsigned int i = start; i < end; i++)
    735      info[i].syllable() = i - start;
    736 
    737    /* Sit tight, rock 'n roll! */
    738    hb_stable_sort (info + start, end - start, compare_indic_order);
    739 
    740    /* Find base again; also flip left-matra sequence. */
    741    unsigned first_left_matra = end;
    742    unsigned last_left_matra = end;
    743    base = end;
    744    for (unsigned int i = start; i < end; i++)
    745    {
    746      if (info[i].indic_position() == POS_BASE_C)
    747      {
    748 base = i;
    749 break;
    750      }
    751      else if (info[i].indic_position() == POS_PRE_M)
    752      {
    753        if (first_left_matra == end)
    754   first_left_matra = i;
    755 last_left_matra = i;
    756      }
    757    }
    758    /* https://github.com/harfbuzz/harfbuzz/issues/3863 */
    759    if (first_left_matra < last_left_matra)
    760    {
    761      /* No need to merge clusters, handled later. */
    762      buffer->reverse_range (first_left_matra, last_left_matra + 1);
    763      /* Reverse back nuktas, etc. */
    764      unsigned i = first_left_matra;
    765      for (unsigned j = i; j <= last_left_matra; j++)
    766 if (FLAG_UNSAFE (info[j].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
    767 {
    768   buffer->reverse_range (i, j + 1);
    769   i = j + 1;
    770 }
    771    }
    772 
    773    /* Things are out-of-control for post base positions, they may shuffle
    774     * around like crazy.  In old-spec mode, we move halants around, so in
    775     * that case merge all clusters after base.  Otherwise, check the sort
    776     * order and merge as needed.
    777     * For pre-base stuff, we handle cluster issues in final reordering.
    778     *
    779     * We could use buffer->sort() for this, if there was no special
    780     * reordering of pre-base stuff happening later...
    781     * We don't want to merge_clusters all of that, which buffer->sort()
    782     * would.  Here's a concrete example:
    783     *
    784     * Assume there's a pre-base consonant and explicit Halant before base,
    785     * followed by a prebase-reordering (left) Matra:
    786     *
    787     *   C,H,ZWNJ,B,M
    788     *
    789     * At this point in reordering we would have:
    790     *
    791     *   M,C,H,ZWNJ,B
    792     *
    793     * whereas in final reordering we will bring the Matra closer to Base:
    794     *
    795     *   C,H,ZWNJ,M,B
    796     *
    797     * That's why we don't want to merge-clusters anything before the Base
    798     * at this point.  But if something moved from after Base to before it,
    799     * we should merge clusters from base to them.  In final-reordering, we
    800     * only move things around before base, and merge-clusters up to base.
    801     * These two merge-clusters from the two sides of base will interlock
    802     * to merge things correctly.  See:
    803     * https://github.com/harfbuzz/harfbuzz/issues/2272
    804     */
    805    if (indic_plan->is_old_spec || end - start > 127)
    806      buffer->merge_clusters (base, end);
    807    else
    808    {
    809      /* Note!  syllable() is a one-byte field. */
    810      for (unsigned int i = base; i < end; i++)
    811 if (info[i].syllable() != 255)
    812 {
    813   unsigned int min = i;
    814   unsigned int max = i;
    815   unsigned int j = start + info[i].syllable();
    816   while (j != i)
    817   {
    818     min = hb_min (min, j);
    819     max = hb_max (max, j);
    820     unsigned int next = start + info[j].syllable();
    821     info[j].syllable() = 255; /* So we don't process j later again. */
    822     j = next;
    823   }
    824   buffer->merge_clusters (hb_max (base, min), max + 1);
    825 }
    826    }
    827 
    828    /* Put syllable back in. */
    829    for (unsigned int i = start; i < end; i++)
    830      info[i].syllable() = syllable;
    831  }
    832 
    833  /* Setup masks now */
    834 
    835  {
    836    hb_mask_t mask;
    837 
    838    /* Reph */
    839    for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++)
    840      info[i].mask |= indic_plan->mask_array[INDIC_RPHF];
    841 
    842    /* Pre-base */
    843    mask = indic_plan->mask_array[INDIC_HALF];
    844    if (!indic_plan->is_old_spec &&
    845 indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)
    846      mask |= indic_plan->mask_array[INDIC_BLWF];
    847    for (unsigned int i = start; i < base; i++)
    848      info[i].mask  |= mask;
    849    /* Base */
    850    mask = 0;
    851    if (base < end)
    852      info[base].mask |= mask;
    853    /* Post-base */
    854    mask = indic_plan->mask_array[INDIC_BLWF] |
    855    indic_plan->mask_array[INDIC_ABVF] |
    856    indic_plan->mask_array[INDIC_PSTF];
    857    for (unsigned int i = base + 1; i < end; i++)
    858      info[i].mask  |= mask;
    859  }
    860 
    861  if (indic_plan->is_old_spec &&
    862      buffer->props.script == HB_SCRIPT_DEVANAGARI)
    863  {
    864    /* Old-spec eye-lash Ra needs special handling.  From the
    865     * spec:
    866     *
    867     * "The feature 'below-base form' is applied to consonants
    868     * having below-base forms and following the base consonant.
    869     * The exception is vattu, which may appear below half forms
    870     * as well as below the base glyph. The feature 'below-base
    871     * form' will be applied to all such occurrences of Ra as well."
    872     *
    873     * Test case: U+0924,U+094D,U+0930,U+094d,U+0915
    874     * with Sanskrit 2003 font.
    875     *
    876     * However, note that Ra,Halant,ZWJ is the correct way to
    877     * request eyelash form of Ra, so we wouldbn't inhibit it
    878     * in that sequence.
    879     *
    880     * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
    881     */
    882    for (unsigned int i = start; i + 1 < base; i++)
    883      if (info[i  ].indic_category() == I_Cat(Ra) &&
    884   info[i+1].indic_category() == I_Cat(H)  &&
    885   (i + 2 == base ||
    886    info[i+2].indic_category() != I_Cat(ZWJ)))
    887      {
    888 info[i  ].mask |= indic_plan->mask_array[INDIC_BLWF];
    889 info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF];
    890      }
    891  }
    892 
    893  unsigned int pref_len = 2;
    894  if (indic_plan->mask_array[INDIC_PREF] && base + pref_len < end)
    895  {
    896    /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */
    897    for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {
    898      hb_codepoint_t glyphs[2];
    899      for (unsigned int j = 0; j < pref_len; j++)
    900 glyphs[j] = info[i + j].codepoint;
    901      if (indic_plan->pref.would_substitute (glyphs, pref_len, face))
    902      {
    903 for (unsigned int j = 0; j < pref_len; j++)
    904   info[i++].mask |= indic_plan->mask_array[INDIC_PREF];
    905 break;
    906      }
    907    }
    908  }
    909 
    910  /* Apply ZWJ/ZWNJ effects */
    911  for (unsigned int i = start + 1; i < end; i++)
    912    if (is_joiner (info[i])) {
    913      bool non_joiner = info[i].indic_category() == I_Cat(ZWNJ);
    914      unsigned int j = i;
    915 
    916      do {
    917 j--;
    918 
    919 /* ZWJ/ZWNJ should disable CJCT.  They do that by simply
    920  * being there, since we don't skip them for the CJCT
    921  * feature (ie. F_MANUAL_ZWJ) */
    922 
    923 /* A ZWNJ disables HALF. */
    924 if (non_joiner)
    925   info[j].mask &= ~indic_plan->mask_array[INDIC_HALF];
    926 
    927      } while (j > start && !is_consonant (info[j]));
    928    }
    929 }
    930 
    931 static void
    932 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
    933 			       hb_face_t *face,
    934 			       hb_buffer_t *buffer,
    935 			       unsigned int start, unsigned int end)
    936 {
    937  /* We treat placeholder/dotted-circle as if they are consonants, so we
    938   * should just chain... */
    939 
    940  initial_reordering_consonant_syllable (plan, face, buffer, start, end);
    941 }
    942 
    943 static void
    944 initial_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
    945 			   hb_face_t *face,
    946 			   hb_buffer_t *buffer,
    947 			   unsigned int start, unsigned int end)
    948 {
    949  indic_syllable_type_t syllable_type = (indic_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
    950  switch (syllable_type)
    951  {
    952    case indic_vowel_syllable: /* We made the vowels look like consonants.  So let's call the consonant logic! */
    953    case indic_consonant_syllable:
    954     initial_reordering_consonant_syllable (plan, face, buffer, start, end);
    955     break;
    956 
    957    case indic_broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */
    958    case indic_standalone_cluster:
    959     initial_reordering_standalone_cluster (plan, face, buffer, start, end);
    960     break;
    961 
    962    case indic_symbol_cluster:
    963    case indic_non_indic_cluster:
    964      break;
    965  }
    966 }
    967 
    968 static bool
    969 initial_reordering_indic (const hb_ot_shape_plan_t *plan,
    970 		  hb_font_t *font,
    971 		  hb_buffer_t *buffer)
    972 {
    973  bool ret = false;
    974  if (!buffer->message (font, "start reordering indic initial"))
    975    return ret;
    976 
    977  update_consonant_positions_indic (plan, font, buffer);
    978  if (hb_syllabic_insert_dotted_circles (font, buffer,
    979 				 indic_broken_cluster,
    980 				 I_Cat(DOTTEDCIRCLE),
    981 				 I_Cat(Repha),
    982 				 POS_END))
    983    ret = true;
    984 
    985  foreach_syllable (buffer, start, end)
    986    initial_reordering_syllable_indic (plan, font->face, buffer, start, end);
    987 
    988  (void) buffer->message (font, "end reordering indic initial");
    989 
    990  return ret;
    991 }
    992 
    993 static void
    994 final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
    995 			 hb_buffer_t *buffer,
    996 			 unsigned int start, unsigned int end)
    997 {
    998  const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
    999  hb_glyph_info_t *info = buffer->info;
   1000 
   1001 
   1002  /* This function relies heavily on halant glyphs.  Lots of ligation
   1003   * and possibly multiple substitutions happened prior to this
   1004   * phase, and that might have messed up our properties.  Recover
   1005   * from a particular case of that where we're fairly sure that a
   1006   * class of I_Cat(H) is desired but has been lost. */
   1007  /* We don't call load_virama_glyph(), since we know it's already
   1008   * loaded. */
   1009  hb_codepoint_t virama_glyph = indic_plan->virama_glyph;
   1010  if (virama_glyph)
   1011  {
   1012    for (unsigned int i = start; i < end; i++)
   1013      if (info[i].codepoint == virama_glyph &&
   1014   _hb_glyph_info_ligated (&info[i]) &&
   1015   _hb_glyph_info_multiplied (&info[i]))
   1016      {
   1017 /* This will make sure that this glyph passes is_halant() test. */
   1018 info[i].indic_category() = I_Cat(H);
   1019 _hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
   1020      }
   1021  }
   1022 
   1023 
   1024  /* 4. Final reordering:
   1025   *
   1026   * After the localized forms and basic shaping forms GSUB features have been
   1027   * applied (see below), the shaping engine performs some final glyph
   1028   * reordering before applying all the remaining font features to the entire
   1029   * syllable.
   1030   */
   1031 
   1032  bool try_pref = !!indic_plan->mask_array[INDIC_PREF];
   1033 
   1034  /* Find base again */
   1035  unsigned int base;
   1036  for (base = start; base < end; base++)
   1037    if (info[base].indic_position() >= POS_BASE_C)
   1038    {
   1039      if (try_pref && base + 1 < end)
   1040      {
   1041 for (unsigned int i = base + 1; i < end; i++)
   1042   if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0)
   1043   {
   1044     if (!(_hb_glyph_info_substituted (&info[i]) &&
   1045 	  _hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
   1046     {
   1047       /* Ok, this was a 'pref' candidate but didn't form any.
   1048        * Base is around here... */
   1049       base = i;
   1050       while (base < end && is_halant (info[base]))
   1051 	base++;
   1052       if (base < end)
   1053 	info[base].indic_position() = POS_BASE_C;
   1054 
   1055       try_pref = false;
   1056     }
   1057     break;
   1058   }
   1059 if (base == end)
   1060   break;
   1061      }
   1062      /* For Malayalam, skip over unformed below- (but NOT post-) forms. */
   1063      if (buffer->props.script == HB_SCRIPT_MALAYALAM)
   1064      {
   1065 for (unsigned int i = base + 1; i < end; i++)
   1066 {
   1067   while (i < end && is_joiner (info[i]))
   1068     i++;
   1069   if (i == end || !is_halant (info[i]))
   1070     break;
   1071   i++; /* Skip halant. */
   1072   while (i < end && is_joiner (info[i]))
   1073     i++;
   1074   if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C)
   1075   {
   1076     base = i;
   1077     info[base].indic_position() = POS_BASE_C;
   1078   }
   1079 }
   1080      }
   1081 
   1082      if (start < base && info[base].indic_position() > POS_BASE_C)
   1083 base--;
   1084      break;
   1085    }
   1086  if (base == end && start < base &&
   1087      is_one_of (info[base - 1], FLAG (I_Cat(ZWJ))))
   1088    base--;
   1089  if (base < end)
   1090    while (start < base &&
   1091    is_one_of (info[base], (FLAG (I_Cat(N)) | FLAG (I_Cat(H)))))
   1092      base--;
   1093 
   1094 
   1095  /*   o Reorder matras:
   1096   *
   1097   *     If a pre-base matra character had been reordered before applying basic
   1098   *     features, the glyph can be moved closer to the main consonant based on
   1099   *     whether half-forms had been formed. Actual position for the matra is
   1100   *     defined as “after last standalone halant glyph, after initial matra
   1101   *     position and before the main consonant”. If ZWJ or ZWNJ follow this
   1102   *     halant, position is moved after it.
   1103   *
   1104   * IMPLEMENTATION NOTES:
   1105   *
   1106   * It looks like the last sentence is wrong.  Testing, with Windows 7 Uniscribe
   1107   * and Devanagari shows that the behavior is best described as:
   1108   *
   1109   * "If ZWJ follows this halant, matra is NOT repositioned after this halant.
   1110   *  If ZWNJ follows this halant, position is moved after it."
   1111   *
   1112   * Test case, with Adobe Devanagari or Nirmala UI:
   1113   *
   1114   *   U+091F,U+094D,U+200C,U+092F,U+093F
   1115   *   (Matra moves to the middle, after ZWNJ.)
   1116   *
   1117   *   U+091F,U+094D,U+200D,U+092F,U+093F
   1118   *   (Matra does NOT move, stays to the left.)
   1119   *
   1120   * https://github.com/harfbuzz/harfbuzz/issues/1070
   1121   */
   1122 
   1123  if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
   1124  {
   1125    /* If we lost track of base, alas, position before last thingy. */
   1126    unsigned int new_pos = base == end ? base - 2 : base - 1;
   1127 
   1128    /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
   1129     * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
   1130     * We want to position matra after them.
   1131     */
   1132    if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
   1133    {
   1134    search:
   1135      while (new_pos > start &&
   1136      !(is_one_of (info[new_pos], (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H))))))
   1137 new_pos--;
   1138 
   1139      /* If we found no Halant we are done.
   1140       * Otherwise only proceed if the Halant does
   1141       * not belong to the Matra itself! */
   1142      if (is_halant (info[new_pos]) &&
   1143   info[new_pos].indic_position() != POS_PRE_M)
   1144      {
   1145 #if 0 // See comment above
   1146 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
   1147 if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
   1148   new_pos++;
   1149 #endif
   1150 if (new_pos + 1 < end)
   1151 {
   1152   /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
   1153   if (info[new_pos + 1].indic_category() == I_Cat(ZWJ))
   1154   {
   1155     /* Keep searching. */
   1156     if (new_pos > start)
   1157     {
   1158       new_pos--;
   1159       goto search;
   1160     }
   1161   }
   1162   /* -> If ZWNJ follows this halant, position is moved after it.
   1163    *
   1164    * IMPLEMENTATION NOTES:
   1165    *
   1166    * This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
   1167    * sequence for a consonant syllable; any pre-base matras occurring after it
   1168    * will belong to the subsequent syllable.
   1169    */
   1170 }
   1171      }
   1172      else
   1173 new_pos = start; /* No move. */
   1174    }
   1175 
   1176    if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M)
   1177    {
   1178      /* Now go see if there's actually any matras... */
   1179      for (unsigned int i = new_pos; i > start; i--)
   1180 if (info[i - 1].indic_position () == POS_PRE_M)
   1181 {
   1182   unsigned int old_pos = i - 1;
   1183   if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
   1184     base--;
   1185 
   1186   hb_glyph_info_t tmp = info[old_pos];
   1187   memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
   1188   info[new_pos] = tmp;
   1189 
   1190   /* Note: this merge_clusters() is intentionally *after* the reordering.
   1191    * Indic matra reordering is special and tricky... */
   1192   buffer->merge_clusters (new_pos, hb_min (end, base + 1));
   1193 
   1194   new_pos--;
   1195 }
   1196    } else {
   1197      for (unsigned int i = start; i < base; i++)
   1198 if (info[i].indic_position () == POS_PRE_M) {
   1199   buffer->merge_clusters (i, hb_min (end, base + 1));
   1200   break;
   1201 }
   1202    }
   1203  }
   1204 
   1205 
   1206  /*   o Reorder reph:
   1207   *
   1208   *     Reph’s original position is always at the beginning of the syllable,
   1209   *     (i.e. it is not reordered at the character reordering stage). However,
   1210   *     it will be reordered according to the basic-forms shaping results.
   1211   *     Possible positions for reph, depending on the script, are; after main,
   1212   *     before post-base consonant forms, and after post-base consonant forms.
   1213   */
   1214 
   1215  /* Two cases:
   1216   *
   1217   * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
   1218   *   we should only move it if the sequence ligated to the repha form.
   1219   *
   1220   * - If repha is encoded separately and in the logical position, we should only
   1221   *   move it if it did NOT ligate.  If it ligated, it's probably the font trying
   1222   *   to make it work without the reordering.
   1223   */
   1224  if (start + 1 < end &&
   1225      info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
   1226      ((info[start].indic_category() == I_Cat(Repha)) ^
   1227       _hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
   1228  {
   1229    unsigned int new_reph_pos;
   1230    reph_position_t reph_pos = indic_plan->config->reph_pos;
   1231 
   1232    /*       1. If reph should be positioned after post-base consonant forms,
   1233     *          proceed to step 5.
   1234     */
   1235    if (reph_pos == REPH_POS_AFTER_POST)
   1236    {
   1237      goto reph_step_5;
   1238    }
   1239 
   1240    /*       2. If the reph repositioning class is not after post-base: target
   1241     *          position is after the first explicit halant glyph between the
   1242     *          first post-reph consonant and last main consonant. If ZWJ or ZWNJ
   1243     *          are following this halant, position is moved after it. If such
   1244     *          position is found, this is the target position. Otherwise,
   1245     *          proceed to the next step.
   1246     *
   1247     *          Note: in old-implementation fonts, where classifications were
   1248     *          fixed in shaping engine, there was no case where reph position
   1249     *          will be found on this step.
   1250     */
   1251    {
   1252      new_reph_pos = start + 1;
   1253      while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
   1254 new_reph_pos++;
   1255 
   1256      if (new_reph_pos < base && is_halant (info[new_reph_pos]))
   1257      {
   1258 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
   1259 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
   1260   new_reph_pos++;
   1261 goto reph_move;
   1262      }
   1263    }
   1264 
   1265    /*       3. If reph should be repositioned after the main consonant: find the
   1266     *          first consonant not ligated with main, or find the first
   1267     *          consonant that is not a potential pre-base-reordering Ra.
   1268     */
   1269    if (reph_pos == REPH_POS_AFTER_MAIN)
   1270    {
   1271      new_reph_pos = base;
   1272      while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN)
   1273 new_reph_pos++;
   1274      if (new_reph_pos < end)
   1275 goto reph_move;
   1276    }
   1277 
   1278    /*       4. If reph should be positioned before post-base consonant, find
   1279     *          first post-base classified consonant not ligated with main. If no
   1280     *          consonant is found, the target position should be before the
   1281     *          first matra, syllable modifier sign or vedic sign.
   1282     */
   1283    /* This is our take on what step 4 is trying to say (and failing, BADLY). */
   1284    if (reph_pos == REPH_POS_AFTER_SUB)
   1285    {
   1286      new_reph_pos = base;
   1287      while (new_reph_pos + 1 < end &&
   1288      !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
   1289 new_reph_pos++;
   1290      if (new_reph_pos < end)
   1291 goto reph_move;
   1292    }
   1293 
   1294    /*       5. If no consonant is found in steps 3 or 4, move reph to a position
   1295     *          immediately before the first post-base matra, syllable modifier
   1296     *          sign or vedic sign that has a reordering class after the intended
   1297     *          reph position. For example, if the reordering position for reph
   1298     *          is post-main, it will skip above-base matras that also have a
   1299     *          post-main position.
   1300     */
   1301    reph_step_5:
   1302    {
   1303      /* Copied from step 2. */
   1304      new_reph_pos = start + 1;
   1305      while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
   1306 new_reph_pos++;
   1307 
   1308      if (new_reph_pos < base && is_halant (info[new_reph_pos]))
   1309      {
   1310 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
   1311 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
   1312   new_reph_pos++;
   1313 goto reph_move;
   1314      }
   1315    }
   1316    /* See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 */
   1317 
   1318    /*       6. Otherwise, reorder reph to the end of the syllable.
   1319     */
   1320    {
   1321      new_reph_pos = end - 1;
   1322      while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD)
   1323 new_reph_pos--;
   1324 
   1325      /*
   1326       * If the Reph is to be ending up after a Matra,Halant sequence,
   1327       * position it before that Halant so it can interact with the Matra.
   1328       * However, if it's a plain Consonant,Halant we shouldn't do that.
   1329       * Uniscribe doesn't do this.
   1330       * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
   1331       */
   1332      if (unlikely (is_halant (info[new_reph_pos])))
   1333      {
   1334 for (unsigned int i = base + 1; i < new_reph_pos; i++)
   1335   if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst))))
   1336   {
   1337     /* Ok, got it. */
   1338     new_reph_pos--;
   1339   }
   1340      }
   1341 
   1342      goto reph_move;
   1343    }
   1344 
   1345    reph_move:
   1346    {
   1347      /* Move */
   1348      buffer->merge_clusters (start, new_reph_pos + 1);
   1349      hb_glyph_info_t reph = info[start];
   1350      memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
   1351      info[new_reph_pos] = reph;
   1352 
   1353      if (start < base && base <= new_reph_pos)
   1354 base--;
   1355    }
   1356  }
   1357 
   1358 
   1359  /*   o Reorder pre-base-reordering consonants:
   1360   *
   1361   *     If a pre-base-reordering consonant is found, reorder it according to
   1362   *     the following rules:
   1363   */
   1364 
   1365  if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */
   1366  {
   1367    for (unsigned int i = base + 1; i < end; i++)
   1368      if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0)
   1369      {
   1370 /*       1. Only reorder a glyph produced by substitution during application
   1371  *          of the <pref> feature. (Note that a font may shape a Ra consonant with
   1372  *          the feature generally but block it in certain contexts.)
   1373  */
   1374 /* Note: We just check that something got substituted.  We don't check that
   1375  * the <pref> feature actually did it...
   1376  *
   1377  * Reorder pref only if it ligated. */
   1378 if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
   1379 {
   1380   /*
   1381    *       2. Try to find a target position the same way as for pre-base matra.
   1382    *          If it is found, reorder pre-base consonant glyph.
   1383    *
   1384    *       3. If position is not found, reorder immediately before main
   1385    *          consonant.
   1386    */
   1387 
   1388   unsigned int new_pos = base;
   1389   /* Malayalam / Tamil do not have "half" forms or explicit virama forms.
   1390    * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
   1391    * We want to position matra after them.
   1392    */
   1393   if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
   1394   {
   1395     while (new_pos > start &&
   1396 	   !(is_one_of (info[new_pos - 1], FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H)))))
   1397       new_pos--;
   1398   }
   1399 
   1400   if (new_pos > start && is_halant (info[new_pos - 1]))
   1401   {
   1402     /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
   1403     if (new_pos < end && is_joiner (info[new_pos]))
   1404       new_pos++;
   1405   }
   1406 
   1407   {
   1408     unsigned int old_pos = i;
   1409 
   1410     buffer->merge_clusters (new_pos, old_pos + 1);
   1411     hb_glyph_info_t tmp = info[old_pos];
   1412     memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
   1413     info[new_pos] = tmp;
   1414 
   1415     if (new_pos <= base && base < old_pos)
   1416       base++;
   1417   }
   1418 }
   1419 
   1420 break;
   1421      }
   1422  }
   1423 
   1424 
   1425  /* Apply 'init' to the Left Matra if it's a word start. */
   1426  if (info[start].indic_position () == POS_PRE_M)
   1427  {
   1428    if (!start ||
   1429 !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) &
   1430  FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
   1431      info[start].mask |= indic_plan->mask_array[INDIC_INIT];
   1432    else
   1433      buffer->unsafe_to_break (start - 1, start + 1);
   1434  }
   1435 }
   1436 
   1437 
   1438 static bool
   1439 final_reordering_indic (const hb_ot_shape_plan_t *plan,
   1440 		hb_font_t *font HB_UNUSED,
   1441 		hb_buffer_t *buffer)
   1442 {
   1443  unsigned int count = buffer->len;
   1444  if (unlikely (!count)) return false;
   1445 
   1446  if (buffer->message (font, "start reordering indic final")) {
   1447    foreach_syllable (buffer, start, end)
   1448      final_reordering_syllable_indic (plan, buffer, start, end);
   1449    (void) buffer->message (font, "end reordering indic final");
   1450  }
   1451 
   1452  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
   1453  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
   1454 
   1455  return false;
   1456 }
   1457 
   1458 
   1459 static void
   1460 preprocess_text_indic (const hb_ot_shape_plan_t *plan,
   1461 	       hb_buffer_t              *buffer,
   1462 	       hb_font_t                *font)
   1463 {
   1464  _hb_preprocess_text_vowel_constraints (plan, buffer, font);
   1465 }
   1466 
   1467 static bool
   1468 decompose_indic (const hb_ot_shape_normalize_context_t *c,
   1469 	 hb_codepoint_t  ab,
   1470 	 hb_codepoint_t *a,
   1471 	 hb_codepoint_t *b)
   1472 {
   1473  switch (ab)
   1474  {
   1475    /* Don't decompose these. */
   1476    case 0x0931u  : return false; /* DEVANAGARI LETTER RRA */
   1477    // https://github.com/harfbuzz/harfbuzz/issues/779
   1478    case 0x09DCu  : return false; /* BENGALI LETTER RRA */
   1479    case 0x09DDu  : return false; /* BENGALI LETTER RHA */
   1480    case 0x0B94u  : return false; /* TAMIL LETTER AU */
   1481 
   1482 
   1483    /*
   1484     * Decompose split matras that don't have Unicode decompositions.
   1485     */
   1486 
   1487 #if 0
   1488    /* Gujarati */
   1489    /* This one has no decomposition in Unicode, but needs no decomposition either. */
   1490    /* case 0x0AC9u  : return false; */
   1491 
   1492    /* Oriya */
   1493    case 0x0B57u  : *a = no decomp, -> RIGHT; return true;
   1494 #endif
   1495  }
   1496 
   1497  return (bool) c->unicode->decompose (ab, a, b);
   1498 }
   1499 
   1500 static bool
   1501 compose_indic (const hb_ot_shape_normalize_context_t *c,
   1502        hb_codepoint_t  a,
   1503        hb_codepoint_t  b,
   1504        hb_codepoint_t *ab)
   1505 {
   1506  /* Avoid recomposing split matras. */
   1507  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
   1508    return false;
   1509 
   1510  /* Composition-exclusion exceptions that we want to recompose. */
   1511  if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; }
   1512 
   1513  return (bool) c->unicode->compose (a, b, ab);
   1514 }
   1515 
   1516 
   1517 const hb_ot_shaper_t _hb_ot_shaper_indic =
   1518 {
   1519  collect_features_indic,
   1520  override_features_indic,
   1521  data_create_indic,
   1522  data_destroy_indic,
   1523  preprocess_text_indic,
   1524  nullptr, /* postprocess_glyphs */
   1525  decompose_indic,
   1526  compose_indic,
   1527  setup_masks_indic,
   1528  nullptr, /* reorder_marks */
   1529  HB_TAG_NONE, /* gpos_tag */
   1530  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
   1531  HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   1532  false, /* fallback_position */
   1533 };
   1534 
   1535 
   1536 #endif