hb-ot-shaper-indic.cc (51073B)
1 /* 2 * Copyright © 2011,2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #include "hb.hh" 28 29 #ifndef HB_NO_OT_SHAPE 30 31 #include "hb-ot-shaper-indic.hh" 32 #include "hb-ot-shaper-indic-machine.hh" 33 #include "hb-ot-shaper-vowel-constraints.hh" 34 #include "hb-ot-layout.hh" 35 36 37 /* 38 * Indic shaper. 39 */ 40 41 42 static inline void 43 set_indic_properties (hb_glyph_info_t &info) 44 { 45 hb_codepoint_t u = info.codepoint; 46 unsigned int type = hb_indic_get_categories (u); 47 48 info.indic_category() = (indic_category_t) (type & 0xFFu); 49 info.indic_position() = (indic_position_t) (type >> 8); 50 } 51 52 53 static inline bool 54 is_one_of (const hb_glyph_info_t &info, unsigned int flags) 55 { 56 /* If it ligated, all bets are off. */ 57 if (_hb_glyph_info_ligated (&info)) return false; 58 return !!(FLAG_UNSAFE (info.indic_category()) & flags); 59 } 60 61 /* Note: 62 * 63 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels 64 * cannot happen in a consonant syllable. The plus side however is, we can call the 65 * consonant syllable logic from the vowel syllable function and get it all right! 66 * 67 * Keep in sync with consonant_categories in the generator. */ 68 #define CONSONANT_FLAGS_INDIC (FLAG (I_Cat(C)) | FLAG (I_Cat(CS)) | FLAG (I_Cat(Ra)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(V)) | FLAG (I_Cat(PLACEHOLDER)) | FLAG (I_Cat(DOTTEDCIRCLE))) 69 70 static inline bool 71 is_consonant (const hb_glyph_info_t &info) 72 { 73 return is_one_of (info, CONSONANT_FLAGS_INDIC); 74 } 75 76 #define JOINER_FLAGS (FLAG (I_Cat(ZWJ)) | FLAG (I_Cat(ZWNJ))) 77 78 static inline bool 79 is_joiner (const hb_glyph_info_t &info) 80 { 81 return is_one_of (info, JOINER_FLAGS); 82 } 83 84 static inline bool 85 is_halant (const hb_glyph_info_t &info) 86 { 87 return is_one_of (info, FLAG (I_Cat(H))); 88 } 89 90 struct hb_indic_would_substitute_feature_t 91 { 92 void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) 93 { 94 zero_context = zero_context_; 95 lookups = map->get_stage_lookups (0/*GSUB*/, 96 map->get_feature_stage (0/*GSUB*/, feature_tag)); 97 } 98 99 bool would_substitute (const hb_codepoint_t *glyphs, 100 unsigned int glyphs_count, 101 hb_face_t *face) const 102 { 103 for (const auto &lookup : lookups) 104 if (hb_ot_layout_lookup_would_substitute (face, lookup.index, glyphs, glyphs_count, zero_context)) 105 return true; 106 return false; 107 } 108 109 private: 110 hb_array_t<const hb_ot_map_t::lookup_map_t> lookups; 111 bool zero_context; 112 }; 113 114 115 /* 116 * Indic configurations. Note that we do not want to keep every single script-specific 117 * behavior in these tables necessarily. This should mainly be used for per-script 118 * properties that are cheaper keeping here, than in the code. Ie. if, say, one and 119 * only one script has an exception, that one script can be if'ed directly in the code, 120 * instead of adding a new flag in these structs. 121 */ 122 123 enum reph_position_t { 124 REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, 125 REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, 126 REPH_POS_AFTER_SUB = POS_AFTER_SUB, 127 REPH_POS_BEFORE_POST = POS_BEFORE_POST, 128 REPH_POS_AFTER_POST = POS_AFTER_POST 129 }; 130 enum reph_mode_t { 131 REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ 132 REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ 133 REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ 134 }; 135 enum blwf_mode_t { 136 BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */ 137 BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */ 138 }; 139 struct indic_config_t 140 { 141 hb_script_t script; 142 bool has_old_spec; 143 hb_codepoint_t virama; 144 reph_position_t reph_pos; 145 reph_mode_t reph_mode; 146 blwf_mode_t blwf_mode; 147 }; 148 149 static const indic_config_t indic_configs[] = 150 { 151 /* Default. Should be first. */ 152 {HB_SCRIPT_INVALID, false, 0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 153 {HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 154 {HB_SCRIPT_BENGALI, true, 0x09CDu,REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 155 {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 156 {HB_SCRIPT_GUJARATI, true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 157 {HB_SCRIPT_ORIYA, true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 158 {HB_SCRIPT_TAMIL, true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 159 {HB_SCRIPT_TELUGU, true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY}, 160 {HB_SCRIPT_KANNADA, true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY}, 161 {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST}, 162 }; 163 164 165 static const hb_ot_map_feature_t 166 indic_features[] = 167 { 168 /* 169 * Basic features. 170 * These features are applied in order, one at a time, after initial_reordering, 171 * constrained to the syllable. 172 */ 173 {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 174 {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 175 {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 176 {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 177 {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 178 {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 179 {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 180 {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 181 {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 182 {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 183 {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 184 /* 185 * Other features. 186 * These features are applied all at once, after final_reordering, constrained 187 * to the syllable. 188 * Default Bengali font in Windows for example has intermixed 189 * lookups for init,pres,abvs,blws features. 190 */ 191 {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, 192 {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 193 {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 194 {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 195 {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 196 {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE}, 197 }; 198 199 /* 200 * Must be in the same order as the indic_features array. 201 */ 202 enum { 203 _INDIC_NUKT, 204 _INDIC_AKHN, 205 INDIC_RPHF, 206 _INDIC_RKRF, 207 INDIC_PREF, 208 INDIC_BLWF, 209 INDIC_ABVF, 210 INDIC_HALF, 211 INDIC_PSTF, 212 _INDIC_VATU, 213 _INDIC_CJCT, 214 215 INDIC_INIT, 216 _INDIC_PRES, 217 _INDIC_ABVS, 218 _INDIC_BLWS, 219 _INDIC_PSTS, 220 _INDIC_HALN, 221 222 INDIC_NUM_FEATURES, 223 INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */ 224 }; 225 226 static bool 227 setup_syllables_indic (const hb_ot_shape_plan_t *plan, 228 hb_font_t *font, 229 hb_buffer_t *buffer); 230 static bool 231 initial_reordering_indic (const hb_ot_shape_plan_t *plan, 232 hb_font_t *font, 233 hb_buffer_t *buffer); 234 static bool 235 final_reordering_indic (const hb_ot_shape_plan_t *plan, 236 hb_font_t *font, 237 hb_buffer_t *buffer); 238 239 static void 240 collect_features_indic (hb_ot_shape_planner_t *plan) 241 { 242 hb_ot_map_builder_t *map = &plan->map; 243 244 /* Do this before any lookups have been applied. */ 245 map->add_gsub_pause (setup_syllables_indic); 246 247 map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE); 248 /* The Indic specs do not require ccmp, but we apply it here since if 249 * there is a use of it, it's typically at the beginning. */ 250 map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE); 251 252 253 unsigned int i = 0; 254 map->add_gsub_pause (initial_reordering_indic); 255 256 for (; i < INDIC_BASIC_FEATURES; i++) { 257 map->add_feature (indic_features[i]); 258 map->add_gsub_pause (nullptr); 259 } 260 261 map->add_gsub_pause (final_reordering_indic); 262 263 for (; i < INDIC_NUM_FEATURES; i++) 264 map->add_feature (indic_features[i]); 265 } 266 267 static void 268 override_features_indic (hb_ot_shape_planner_t *plan) 269 { 270 plan->map.disable_feature (HB_TAG('l','i','g','a')); 271 plan->map.add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var 272 } 273 274 275 struct indic_shape_plan_t 276 { 277 bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const 278 { 279 hb_codepoint_t glyph = virama_glyph; 280 if (unlikely (glyph == (hb_codepoint_t) -1)) 281 { 282 if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph)) 283 glyph = 0; 284 /* Technically speaking, the spec says we should apply 'locl' to virama too. 285 * Maybe one day... */ 286 287 /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph 288 * during shape planning... Instead, overwrite it here. */ 289 virama_glyph = (int) glyph; 290 } 291 292 *pglyph = glyph; 293 return glyph != 0; 294 } 295 296 const indic_config_t *config; 297 298 bool is_old_spec; 299 mutable hb_atomic_t<hb_codepoint_t> virama_glyph; 300 301 hb_indic_would_substitute_feature_t rphf; 302 hb_indic_would_substitute_feature_t pref; 303 hb_indic_would_substitute_feature_t blwf; 304 hb_indic_would_substitute_feature_t pstf; 305 hb_indic_would_substitute_feature_t vatu; 306 307 hb_mask_t mask_array[INDIC_NUM_FEATURES]; 308 }; 309 310 static void * 311 data_create_indic (const hb_ot_shape_plan_t *plan) 312 { 313 indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) hb_calloc (1, sizeof (indic_shape_plan_t)); 314 if (unlikely (!indic_plan)) 315 return nullptr; 316 317 indic_plan->config = &indic_configs[0]; 318 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) 319 if (plan->props.script == indic_configs[i].script) { 320 indic_plan->config = &indic_configs[i]; 321 break; 322 } 323 324 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2'); 325 indic_plan->virama_glyph = -1; 326 327 /* Use zero-context would_substitute() matching for new-spec of the main 328 * Indic scripts, and scripts with one spec only, but not for old-specs. 329 * The new-spec for all dual-spec scripts says zero-context matching happens. 330 * 331 * However, testing with Malayalam shows that old and new spec both allow 332 * context. Testing with Bengali new-spec however shows that it doesn't. 333 * So, the heuristic here is the way it is. It should *only* be changed, 334 * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. 335 */ 336 bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM; 337 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); 338 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); 339 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); 340 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); 341 indic_plan->vatu.init (&plan->map, HB_TAG('v','a','t','u'), zero_context); 342 343 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) 344 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? 345 0 : plan->map.get_1_mask (indic_features[i].tag); 346 347 return indic_plan; 348 } 349 350 static void 351 data_destroy_indic (void *data) 352 { 353 hb_free (data); 354 } 355 356 static indic_position_t 357 consonant_position_from_face (const indic_shape_plan_t *indic_plan, 358 const hb_codepoint_t consonant, 359 const hb_codepoint_t virama, 360 hb_face_t *face) 361 { 362 /* For old-spec, the order of glyphs is Consonant,Virama, 363 * whereas for new-spec, it's Virama,Consonant. However, 364 * some broken fonts (like Free Sans) simply copied lookups 365 * from old-spec to new-spec without modification. 366 * And oddly enough, Uniscribe seems to respect those lookups. 367 * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds 368 * base at 0. The font however, only has lookups matching 369 * 930,94D in 'blwf', not the expected 94D,930 (with new-spec 370 * table). As such, we simply match both sequences. Seems 371 * to work. 372 * 373 * Vatu is done as well, for: 374 * https://github.com/harfbuzz/harfbuzz/issues/1587 375 */ 376 hb_codepoint_t glyphs[3] = {virama, consonant, virama}; 377 if (indic_plan->blwf.would_substitute (glyphs , 2, face) || 378 indic_plan->blwf.would_substitute (glyphs+1, 2, face) || 379 indic_plan->vatu.would_substitute (glyphs , 2, face) || 380 indic_plan->vatu.would_substitute (glyphs+1, 2, face)) 381 return POS_BELOW_C; 382 if (indic_plan->pstf.would_substitute (glyphs , 2, face) || 383 indic_plan->pstf.would_substitute (glyphs+1, 2, face)) 384 return POS_POST_C; 385 if (indic_plan->pref.would_substitute (glyphs , 2, face) || 386 indic_plan->pref.would_substitute (glyphs+1, 2, face)) 387 return POS_POST_C; 388 return POS_BASE_C; 389 } 390 391 static void 392 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, 393 hb_buffer_t *buffer, 394 hb_font_t *font HB_UNUSED) 395 { 396 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); 397 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); 398 399 /* We cannot setup masks here. We save information about characters 400 * and setup masks later on in a pause-callback. */ 401 402 unsigned int count = buffer->len; 403 hb_glyph_info_t *info = buffer->info; 404 for (unsigned int i = 0; i < count; i++) 405 set_indic_properties (info[i]); 406 } 407 408 static bool 409 setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, 410 hb_font_t *font HB_UNUSED, 411 hb_buffer_t *buffer) 412 { 413 HB_BUFFER_ALLOCATE_VAR (buffer, syllable); 414 find_syllables_indic (buffer); 415 foreach_syllable (buffer, start, end) 416 buffer->unsafe_to_break (start, end); 417 return false; 418 } 419 420 static int 421 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) 422 { 423 int a = pa->indic_position(); 424 int b = pb->indic_position(); 425 426 return (int) a - (int) b; 427 } 428 429 430 431 static void 432 update_consonant_positions_indic (const hb_ot_shape_plan_t *plan, 433 hb_font_t *font, 434 hb_buffer_t *buffer) 435 { 436 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 437 438 hb_codepoint_t virama; 439 if (indic_plan->load_virama_glyph (font, &virama)) 440 { 441 hb_face_t *face = font->face; 442 unsigned int count = buffer->len; 443 hb_glyph_info_t *info = buffer->info; 444 for (unsigned int i = 0; i < count; i++) 445 if (info[i].indic_position() == POS_BASE_C) 446 { 447 hb_codepoint_t consonant = info[i].codepoint; 448 info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face); 449 } 450 } 451 } 452 453 454 /* Rules from: 455 * https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ 456 457 static void 458 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, 459 hb_face_t *face, 460 hb_buffer_t *buffer, 461 unsigned int start, unsigned int end) 462 { 463 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 464 hb_glyph_info_t *info = buffer->info; 465 466 /* https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 467 * // For compatibility with legacy usage in Kannada, 468 * // Ra+h+ZWJ must behave like Ra+ZWJ+h... 469 */ 470 if (buffer->props.script == HB_SCRIPT_KANNADA && 471 start + 3 <= end && 472 is_one_of (info[start ], FLAG (I_Cat(Ra))) && 473 is_one_of (info[start+1], FLAG (I_Cat(H))) && 474 is_one_of (info[start+2], FLAG (I_Cat(ZWJ)))) 475 { 476 buffer->merge_clusters (start+1, start+3); 477 hb_swap (info[start+1], info[start+2]); 478 } 479 480 /* 1. Find base consonant: 481 * 482 * The shaping engine finds the base consonant of the syllable, using the 483 * following algorithm: starting from the end of the syllable, move backwards 484 * until a consonant is found that does not have a below-base or post-base 485 * form (post-base forms have to follow below-base forms), or that is not a 486 * pre-base-reordering Ra, or arrive at the first consonant. The consonant 487 * stopped at will be the base. 488 * 489 * o If the syllable starts with Ra + Halant (in a script that has Reph) 490 * and has more than one consonant, Ra is excluded from candidates for 491 * base consonants. 492 */ 493 494 unsigned int base = end; 495 bool has_reph = false; 496 497 { 498 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 499 * and has more than one consonant, Ra is excluded from candidates for 500 * base consonants. */ 501 unsigned int limit = start; 502 if (indic_plan->mask_array[INDIC_RPHF] && 503 start + 3 <= end && 504 ( 505 (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) || 506 (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == I_Cat(ZWJ)) 507 )) 508 { 509 /* See if it matches the 'rphf' feature. */ 510 hb_codepoint_t glyphs[3] = {info[start].codepoint, 511 info[start + 1].codepoint, 512 indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ? 513 info[start + 2].codepoint : 0}; 514 if (indic_plan->rphf.would_substitute (glyphs, 2, face) || 515 (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && 516 indic_plan->rphf.would_substitute (glyphs, 3, face))) 517 { 518 limit += 2; 519 while (limit < end && is_joiner (info[limit])) 520 limit++; 521 base = start; 522 has_reph = true; 523 } 524 } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == I_Cat(Repha)) 525 { 526 limit += 1; 527 while (limit < end && is_joiner (info[limit])) 528 limit++; 529 base = start; 530 has_reph = true; 531 } 532 533 { 534 /* -> starting from the end of the syllable, move backwards */ 535 unsigned int i = end; 536 bool seen_below = false; 537 do { 538 i--; 539 /* -> until a consonant is found */ 540 if (is_consonant (info[i])) 541 { 542 /* -> that does not have a below-base or post-base form 543 * (post-base forms have to follow below-base forms), */ 544 if (info[i].indic_position() != POS_BELOW_C && 545 (info[i].indic_position() != POS_POST_C || seen_below)) 546 { 547 base = i; 548 break; 549 } 550 if (info[i].indic_position() == POS_BELOW_C) 551 seen_below = true; 552 553 /* -> or that is not a pre-base-reordering Ra, 554 * 555 * IMPLEMENTATION NOTES: 556 * 557 * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped 558 * by the logic above already. 559 */ 560 561 /* -> or arrive at the first consonant. The consonant stopped at will 562 * be the base. */ 563 base = i; 564 } 565 else 566 { 567 /* A ZWJ after a Halant stops the base search, and requests an explicit 568 * half form. 569 * A ZWJ before a Halant, requests a subjoined form instead, and hence 570 * search continues. This is particularly important for Bengali 571 * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ 572 if (start < i && 573 info[i].indic_category() == I_Cat(ZWJ) && 574 info[i - 1].indic_category() == I_Cat(H)) 575 break; 576 } 577 } while (i > limit); 578 } 579 580 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 581 * and has more than one consonant, Ra is excluded from candidates for 582 * base consonants. 583 * 584 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ 585 if (has_reph && base == start && limit - base <= 2) { 586 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 587 has_reph = false; 588 } 589 } 590 591 592 /* 2. Decompose and reorder Matras: 593 * 594 * Each matra and any syllable modifier sign in the syllable are moved to the 595 * appropriate position relative to the consonant(s) in the syllable. The 596 * shaping engine decomposes two- or three-part matras into their constituent 597 * parts before any repositioning. Matra characters are classified by which 598 * consonant in a conjunct they have affinity for and are reordered to the 599 * following positions: 600 * 601 * o Before first half form in the syllable 602 * o After subjoined consonants 603 * o After post-form consonant 604 * o After main consonant (for above marks) 605 * 606 * IMPLEMENTATION NOTES: 607 * 608 * The normalize() routine has already decomposed matras for us, so we don't 609 * need to worry about that. 610 */ 611 612 613 /* 3. Reorder marks to canonical order: 614 * 615 * Adjacent nukta and halant or nukta and vedic sign are always repositioned 616 * if necessary, so that the nukta is first. 617 * 618 * IMPLEMENTATION NOTES: 619 * 620 * We don't need to do this: the normalize() routine already did this for us. 621 */ 622 623 624 /* Reorder characters */ 625 626 for (unsigned int i = start; i < base; i++) 627 info[i].indic_position() = hb_min (POS_PRE_C, (indic_position_t) info[i].indic_position()); 628 629 if (base < end) 630 info[base].indic_position() = POS_BASE_C; 631 632 /* Handle beginning Ra */ 633 if (has_reph) 634 info[start].indic_position() = POS_RA_TO_BECOME_REPH; 635 636 /* For old-style Indic script tags, move the first post-base Halant after 637 * last consonant. 638 * 639 * Reports suggest that in some scripts Uniscribe does this only if there 640 * is *not* a Halant after last consonant already. We know that is the 641 * case for Kannada, while it reorders unconditionally in other scripts, 642 * eg. Malayalam, Bengali, and Devanagari. We don't currently know about 643 * other scripts, so we block Kannada. 644 * 645 * Kannada test case: 646 * U+0C9A,U+0CCD,U+0C9A,U+0CCD 647 * With some versions of Lohit Kannada. 648 * https://bugs.freedesktop.org/show_bug.cgi?id=59118 649 * 650 * Malayalam test case: 651 * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D 652 * With lohit-ttf-20121122/Lohit-Malayalam.ttf 653 * 654 * Bengali test case: 655 * U+0998,U+09CD,U+09AF,U+09CD 656 * With Windows XP vrinda.ttf 657 * https://github.com/harfbuzz/harfbuzz/issues/1073 658 * 659 * Devanagari test case: 660 * U+091F,U+094D,U+0930,U+094D 661 * With chandas.ttf 662 * https://github.com/harfbuzz/harfbuzz/issues/1071 663 */ 664 if (indic_plan->is_old_spec) 665 { 666 bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA; 667 for (unsigned int i = base + 1; i < end; i++) 668 if (info[i].indic_category() == I_Cat(H)) 669 { 670 unsigned int j; 671 for (j = end - 1; j > i; j--) 672 if (is_consonant (info[j]) || 673 (disallow_double_halants && info[j].indic_category() == I_Cat(H))) 674 break; 675 if (info[j].indic_category() != I_Cat(H) && j > i) { 676 /* Move Halant to after last consonant. */ 677 hb_glyph_info_t t = info[i]; 678 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); 679 info[j] = t; 680 } 681 break; 682 } 683 } 684 685 /* Attach misc marks to previous char to move with them. */ 686 { 687 indic_position_t last_pos = POS_START; 688 for (unsigned int i = start; i < end; i++) 689 { 690 if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (I_Cat(N)) | FLAG (I_Cat(RS)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(H))))) 691 { 692 info[i].indic_position() = last_pos; 693 if (unlikely (info[i].indic_category() == I_Cat(H) && 694 info[i].indic_position() == POS_PRE_M)) 695 { 696 /* 697 * Uniscribe doesn't move the Halant with Left Matra. 698 * TEST: U+092B,U+093F,U+094D 699 * We follow. 700 */ 701 for (unsigned int j = i; j > start; j--) 702 if (info[j - 1].indic_position() != POS_PRE_M) { 703 info[i].indic_position() = info[j - 1].indic_position(); 704 break; 705 } 706 } 707 } else if (info[i].indic_position() != POS_SMVD) { 708 if (info[i].indic_category() == I_Cat(MPst) && 709 i > start && info[i - 1].indic_category() == I_Cat(SM)) 710 info[i - 1].indic_position() = info[i].indic_position(); 711 last_pos = (indic_position_t) info[i].indic_position(); 712 } 713 } 714 } 715 /* For post-base consonants let them own anything before them 716 * since the last consonant or matra. */ 717 { 718 unsigned int last = base; 719 for (unsigned int i = base + 1; i < end; i++) 720 if (is_consonant (info[i])) 721 { 722 for (unsigned int j = last + 1; j < i; j++) 723 if (info[j].indic_position() < POS_SMVD) 724 info[j].indic_position() = info[i].indic_position(); 725 last = i; 726 } else if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)))) 727 last = i; 728 } 729 730 731 { 732 /* Use syllable() for sort accounting temporarily. */ 733 unsigned int syllable = info[start].syllable(); 734 for (unsigned int i = start; i < end; i++) 735 info[i].syllable() = i - start; 736 737 /* Sit tight, rock 'n roll! */ 738 hb_stable_sort (info + start, end - start, compare_indic_order); 739 740 /* Find base again; also flip left-matra sequence. */ 741 unsigned first_left_matra = end; 742 unsigned last_left_matra = end; 743 base = end; 744 for (unsigned int i = start; i < end; i++) 745 { 746 if (info[i].indic_position() == POS_BASE_C) 747 { 748 base = i; 749 break; 750 } 751 else if (info[i].indic_position() == POS_PRE_M) 752 { 753 if (first_left_matra == end) 754 first_left_matra = i; 755 last_left_matra = i; 756 } 757 } 758 /* https://github.com/harfbuzz/harfbuzz/issues/3863 */ 759 if (first_left_matra < last_left_matra) 760 { 761 /* No need to merge clusters, handled later. */ 762 buffer->reverse_range (first_left_matra, last_left_matra + 1); 763 /* Reverse back nuktas, etc. */ 764 unsigned i = first_left_matra; 765 for (unsigned j = i; j <= last_left_matra; j++) 766 if (FLAG_UNSAFE (info[j].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)))) 767 { 768 buffer->reverse_range (i, j + 1); 769 i = j + 1; 770 } 771 } 772 773 /* Things are out-of-control for post base positions, they may shuffle 774 * around like crazy. In old-spec mode, we move halants around, so in 775 * that case merge all clusters after base. Otherwise, check the sort 776 * order and merge as needed. 777 * For pre-base stuff, we handle cluster issues in final reordering. 778 * 779 * We could use buffer->sort() for this, if there was no special 780 * reordering of pre-base stuff happening later... 781 * We don't want to merge_clusters all of that, which buffer->sort() 782 * would. Here's a concrete example: 783 * 784 * Assume there's a pre-base consonant and explicit Halant before base, 785 * followed by a prebase-reordering (left) Matra: 786 * 787 * C,H,ZWNJ,B,M 788 * 789 * At this point in reordering we would have: 790 * 791 * M,C,H,ZWNJ,B 792 * 793 * whereas in final reordering we will bring the Matra closer to Base: 794 * 795 * C,H,ZWNJ,M,B 796 * 797 * That's why we don't want to merge-clusters anything before the Base 798 * at this point. But if something moved from after Base to before it, 799 * we should merge clusters from base to them. In final-reordering, we 800 * only move things around before base, and merge-clusters up to base. 801 * These two merge-clusters from the two sides of base will interlock 802 * to merge things correctly. See: 803 * https://github.com/harfbuzz/harfbuzz/issues/2272 804 */ 805 if (indic_plan->is_old_spec || end - start > 127) 806 buffer->merge_clusters (base, end); 807 else 808 { 809 /* Note! syllable() is a one-byte field. */ 810 for (unsigned int i = base; i < end; i++) 811 if (info[i].syllable() != 255) 812 { 813 unsigned int min = i; 814 unsigned int max = i; 815 unsigned int j = start + info[i].syllable(); 816 while (j != i) 817 { 818 min = hb_min (min, j); 819 max = hb_max (max, j); 820 unsigned int next = start + info[j].syllable(); 821 info[j].syllable() = 255; /* So we don't process j later again. */ 822 j = next; 823 } 824 buffer->merge_clusters (hb_max (base, min), max + 1); 825 } 826 } 827 828 /* Put syllable back in. */ 829 for (unsigned int i = start; i < end; i++) 830 info[i].syllable() = syllable; 831 } 832 833 /* Setup masks now */ 834 835 { 836 hb_mask_t mask; 837 838 /* Reph */ 839 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) 840 info[i].mask |= indic_plan->mask_array[INDIC_RPHF]; 841 842 /* Pre-base */ 843 mask = indic_plan->mask_array[INDIC_HALF]; 844 if (!indic_plan->is_old_spec && 845 indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST) 846 mask |= indic_plan->mask_array[INDIC_BLWF]; 847 for (unsigned int i = start; i < base; i++) 848 info[i].mask |= mask; 849 /* Base */ 850 mask = 0; 851 if (base < end) 852 info[base].mask |= mask; 853 /* Post-base */ 854 mask = indic_plan->mask_array[INDIC_BLWF] | 855 indic_plan->mask_array[INDIC_ABVF] | 856 indic_plan->mask_array[INDIC_PSTF]; 857 for (unsigned int i = base + 1; i < end; i++) 858 info[i].mask |= mask; 859 } 860 861 if (indic_plan->is_old_spec && 862 buffer->props.script == HB_SCRIPT_DEVANAGARI) 863 { 864 /* Old-spec eye-lash Ra needs special handling. From the 865 * spec: 866 * 867 * "The feature 'below-base form' is applied to consonants 868 * having below-base forms and following the base consonant. 869 * The exception is vattu, which may appear below half forms 870 * as well as below the base glyph. The feature 'below-base 871 * form' will be applied to all such occurrences of Ra as well." 872 * 873 * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 874 * with Sanskrit 2003 font. 875 * 876 * However, note that Ra,Halant,ZWJ is the correct way to 877 * request eyelash form of Ra, so we wouldbn't inhibit it 878 * in that sequence. 879 * 880 * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 881 */ 882 for (unsigned int i = start; i + 1 < base; i++) 883 if (info[i ].indic_category() == I_Cat(Ra) && 884 info[i+1].indic_category() == I_Cat(H) && 885 (i + 2 == base || 886 info[i+2].indic_category() != I_Cat(ZWJ))) 887 { 888 info[i ].mask |= indic_plan->mask_array[INDIC_BLWF]; 889 info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF]; 890 } 891 } 892 893 unsigned int pref_len = 2; 894 if (indic_plan->mask_array[INDIC_PREF] && base + pref_len < end) 895 { 896 /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */ 897 for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) { 898 hb_codepoint_t glyphs[2]; 899 for (unsigned int j = 0; j < pref_len; j++) 900 glyphs[j] = info[i + j].codepoint; 901 if (indic_plan->pref.would_substitute (glyphs, pref_len, face)) 902 { 903 for (unsigned int j = 0; j < pref_len; j++) 904 info[i++].mask |= indic_plan->mask_array[INDIC_PREF]; 905 break; 906 } 907 } 908 } 909 910 /* Apply ZWJ/ZWNJ effects */ 911 for (unsigned int i = start + 1; i < end; i++) 912 if (is_joiner (info[i])) { 913 bool non_joiner = info[i].indic_category() == I_Cat(ZWNJ); 914 unsigned int j = i; 915 916 do { 917 j--; 918 919 /* ZWJ/ZWNJ should disable CJCT. They do that by simply 920 * being there, since we don't skip them for the CJCT 921 * feature (ie. F_MANUAL_ZWJ) */ 922 923 /* A ZWNJ disables HALF. */ 924 if (non_joiner) 925 info[j].mask &= ~indic_plan->mask_array[INDIC_HALF]; 926 927 } while (j > start && !is_consonant (info[j])); 928 } 929 } 930 931 static void 932 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, 933 hb_face_t *face, 934 hb_buffer_t *buffer, 935 unsigned int start, unsigned int end) 936 { 937 /* We treat placeholder/dotted-circle as if they are consonants, so we 938 * should just chain... */ 939 940 initial_reordering_consonant_syllable (plan, face, buffer, start, end); 941 } 942 943 static void 944 initial_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, 945 hb_face_t *face, 946 hb_buffer_t *buffer, 947 unsigned int start, unsigned int end) 948 { 949 indic_syllable_type_t syllable_type = (indic_syllable_type_t) (buffer->info[start].syllable() & 0x0F); 950 switch (syllable_type) 951 { 952 case indic_vowel_syllable: /* We made the vowels look like consonants. So let's call the consonant logic! */ 953 case indic_consonant_syllable: 954 initial_reordering_consonant_syllable (plan, face, buffer, start, end); 955 break; 956 957 case indic_broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */ 958 case indic_standalone_cluster: 959 initial_reordering_standalone_cluster (plan, face, buffer, start, end); 960 break; 961 962 case indic_symbol_cluster: 963 case indic_non_indic_cluster: 964 break; 965 } 966 } 967 968 static bool 969 initial_reordering_indic (const hb_ot_shape_plan_t *plan, 970 hb_font_t *font, 971 hb_buffer_t *buffer) 972 { 973 bool ret = false; 974 if (!buffer->message (font, "start reordering indic initial")) 975 return ret; 976 977 update_consonant_positions_indic (plan, font, buffer); 978 if (hb_syllabic_insert_dotted_circles (font, buffer, 979 indic_broken_cluster, 980 I_Cat(DOTTEDCIRCLE), 981 I_Cat(Repha), 982 POS_END)) 983 ret = true; 984 985 foreach_syllable (buffer, start, end) 986 initial_reordering_syllable_indic (plan, font->face, buffer, start, end); 987 988 (void) buffer->message (font, "end reordering indic initial"); 989 990 return ret; 991 } 992 993 static void 994 final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, 995 hb_buffer_t *buffer, 996 unsigned int start, unsigned int end) 997 { 998 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 999 hb_glyph_info_t *info = buffer->info; 1000 1001 1002 /* This function relies heavily on halant glyphs. Lots of ligation 1003 * and possibly multiple substitutions happened prior to this 1004 * phase, and that might have messed up our properties. Recover 1005 * from a particular case of that where we're fairly sure that a 1006 * class of I_Cat(H) is desired but has been lost. */ 1007 /* We don't call load_virama_glyph(), since we know it's already 1008 * loaded. */ 1009 hb_codepoint_t virama_glyph = indic_plan->virama_glyph; 1010 if (virama_glyph) 1011 { 1012 for (unsigned int i = start; i < end; i++) 1013 if (info[i].codepoint == virama_glyph && 1014 _hb_glyph_info_ligated (&info[i]) && 1015 _hb_glyph_info_multiplied (&info[i])) 1016 { 1017 /* This will make sure that this glyph passes is_halant() test. */ 1018 info[i].indic_category() = I_Cat(H); 1019 _hb_glyph_info_clear_ligated_and_multiplied (&info[i]); 1020 } 1021 } 1022 1023 1024 /* 4. Final reordering: 1025 * 1026 * After the localized forms and basic shaping forms GSUB features have been 1027 * applied (see below), the shaping engine performs some final glyph 1028 * reordering before applying all the remaining font features to the entire 1029 * syllable. 1030 */ 1031 1032 bool try_pref = !!indic_plan->mask_array[INDIC_PREF]; 1033 1034 /* Find base again */ 1035 unsigned int base; 1036 for (base = start; base < end; base++) 1037 if (info[base].indic_position() >= POS_BASE_C) 1038 { 1039 if (try_pref && base + 1 < end) 1040 { 1041 for (unsigned int i = base + 1; i < end; i++) 1042 if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0) 1043 { 1044 if (!(_hb_glyph_info_substituted (&info[i]) && 1045 _hb_glyph_info_ligated_and_didnt_multiply (&info[i]))) 1046 { 1047 /* Ok, this was a 'pref' candidate but didn't form any. 1048 * Base is around here... */ 1049 base = i; 1050 while (base < end && is_halant (info[base])) 1051 base++; 1052 if (base < end) 1053 info[base].indic_position() = POS_BASE_C; 1054 1055 try_pref = false; 1056 } 1057 break; 1058 } 1059 if (base == end) 1060 break; 1061 } 1062 /* For Malayalam, skip over unformed below- (but NOT post-) forms. */ 1063 if (buffer->props.script == HB_SCRIPT_MALAYALAM) 1064 { 1065 for (unsigned int i = base + 1; i < end; i++) 1066 { 1067 while (i < end && is_joiner (info[i])) 1068 i++; 1069 if (i == end || !is_halant (info[i])) 1070 break; 1071 i++; /* Skip halant. */ 1072 while (i < end && is_joiner (info[i])) 1073 i++; 1074 if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C) 1075 { 1076 base = i; 1077 info[base].indic_position() = POS_BASE_C; 1078 } 1079 } 1080 } 1081 1082 if (start < base && info[base].indic_position() > POS_BASE_C) 1083 base--; 1084 break; 1085 } 1086 if (base == end && start < base && 1087 is_one_of (info[base - 1], FLAG (I_Cat(ZWJ)))) 1088 base--; 1089 if (base < end) 1090 while (start < base && 1091 is_one_of (info[base], (FLAG (I_Cat(N)) | FLAG (I_Cat(H))))) 1092 base--; 1093 1094 1095 /* o Reorder matras: 1096 * 1097 * If a pre-base matra character had been reordered before applying basic 1098 * features, the glyph can be moved closer to the main consonant based on 1099 * whether half-forms had been formed. Actual position for the matra is 1100 * defined as “after last standalone halant glyph, after initial matra 1101 * position and before the main consonant”. If ZWJ or ZWNJ follow this 1102 * halant, position is moved after it. 1103 * 1104 * IMPLEMENTATION NOTES: 1105 * 1106 * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe 1107 * and Devanagari shows that the behavior is best described as: 1108 * 1109 * "If ZWJ follows this halant, matra is NOT repositioned after this halant. 1110 * If ZWNJ follows this halant, position is moved after it." 1111 * 1112 * Test case, with Adobe Devanagari or Nirmala UI: 1113 * 1114 * U+091F,U+094D,U+200C,U+092F,U+093F 1115 * (Matra moves to the middle, after ZWNJ.) 1116 * 1117 * U+091F,U+094D,U+200D,U+092F,U+093F 1118 * (Matra does NOT move, stays to the left.) 1119 * 1120 * https://github.com/harfbuzz/harfbuzz/issues/1070 1121 */ 1122 1123 if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ 1124 { 1125 /* If we lost track of base, alas, position before last thingy. */ 1126 unsigned int new_pos = base == end ? base - 2 : base - 1; 1127 1128 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1129 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1130 * We want to position matra after them. 1131 */ 1132 if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1133 { 1134 search: 1135 while (new_pos > start && 1136 !(is_one_of (info[new_pos], (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H)))))) 1137 new_pos--; 1138 1139 /* If we found no Halant we are done. 1140 * Otherwise only proceed if the Halant does 1141 * not belong to the Matra itself! */ 1142 if (is_halant (info[new_pos]) && 1143 info[new_pos].indic_position() != POS_PRE_M) 1144 { 1145 #if 0 // See comment above 1146 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1147 if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) 1148 new_pos++; 1149 #endif 1150 if (new_pos + 1 < end) 1151 { 1152 /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */ 1153 if (info[new_pos + 1].indic_category() == I_Cat(ZWJ)) 1154 { 1155 /* Keep searching. */ 1156 if (new_pos > start) 1157 { 1158 new_pos--; 1159 goto search; 1160 } 1161 } 1162 /* -> If ZWNJ follows this halant, position is moved after it. 1163 * 1164 * IMPLEMENTATION NOTES: 1165 * 1166 * This is taken care of by the state-machine. A Halant,ZWNJ is a terminating 1167 * sequence for a consonant syllable; any pre-base matras occurring after it 1168 * will belong to the subsequent syllable. 1169 */ 1170 } 1171 } 1172 else 1173 new_pos = start; /* No move. */ 1174 } 1175 1176 if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) 1177 { 1178 /* Now go see if there's actually any matras... */ 1179 for (unsigned int i = new_pos; i > start; i--) 1180 if (info[i - 1].indic_position () == POS_PRE_M) 1181 { 1182 unsigned int old_pos = i - 1; 1183 if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */ 1184 base--; 1185 1186 hb_glyph_info_t tmp = info[old_pos]; 1187 memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); 1188 info[new_pos] = tmp; 1189 1190 /* Note: this merge_clusters() is intentionally *after* the reordering. 1191 * Indic matra reordering is special and tricky... */ 1192 buffer->merge_clusters (new_pos, hb_min (end, base + 1)); 1193 1194 new_pos--; 1195 } 1196 } else { 1197 for (unsigned int i = start; i < base; i++) 1198 if (info[i].indic_position () == POS_PRE_M) { 1199 buffer->merge_clusters (i, hb_min (end, base + 1)); 1200 break; 1201 } 1202 } 1203 } 1204 1205 1206 /* o Reorder reph: 1207 * 1208 * Reph’s original position is always at the beginning of the syllable, 1209 * (i.e. it is not reordered at the character reordering stage). However, 1210 * it will be reordered according to the basic-forms shaping results. 1211 * Possible positions for reph, depending on the script, are; after main, 1212 * before post-base consonant forms, and after post-base consonant forms. 1213 */ 1214 1215 /* Two cases: 1216 * 1217 * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then 1218 * we should only move it if the sequence ligated to the repha form. 1219 * 1220 * - If repha is encoded separately and in the logical position, we should only 1221 * move it if it did NOT ligate. If it ligated, it's probably the font trying 1222 * to make it work without the reordering. 1223 */ 1224 if (start + 1 < end && 1225 info[start].indic_position() == POS_RA_TO_BECOME_REPH && 1226 ((info[start].indic_category() == I_Cat(Repha)) ^ 1227 _hb_glyph_info_ligated_and_didnt_multiply (&info[start]))) 1228 { 1229 unsigned int new_reph_pos; 1230 reph_position_t reph_pos = indic_plan->config->reph_pos; 1231 1232 /* 1. If reph should be positioned after post-base consonant forms, 1233 * proceed to step 5. 1234 */ 1235 if (reph_pos == REPH_POS_AFTER_POST) 1236 { 1237 goto reph_step_5; 1238 } 1239 1240 /* 2. If the reph repositioning class is not after post-base: target 1241 * position is after the first explicit halant glyph between the 1242 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 1243 * are following this halant, position is moved after it. If such 1244 * position is found, this is the target position. Otherwise, 1245 * proceed to the next step. 1246 * 1247 * Note: in old-implementation fonts, where classifications were 1248 * fixed in shaping engine, there was no case where reph position 1249 * will be found on this step. 1250 */ 1251 { 1252 new_reph_pos = start + 1; 1253 while (new_reph_pos < base && !is_halant (info[new_reph_pos])) 1254 new_reph_pos++; 1255 1256 if (new_reph_pos < base && is_halant (info[new_reph_pos])) 1257 { 1258 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1259 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1260 new_reph_pos++; 1261 goto reph_move; 1262 } 1263 } 1264 1265 /* 3. If reph should be repositioned after the main consonant: find the 1266 * first consonant not ligated with main, or find the first 1267 * consonant that is not a potential pre-base-reordering Ra. 1268 */ 1269 if (reph_pos == REPH_POS_AFTER_MAIN) 1270 { 1271 new_reph_pos = base; 1272 while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) 1273 new_reph_pos++; 1274 if (new_reph_pos < end) 1275 goto reph_move; 1276 } 1277 1278 /* 4. If reph should be positioned before post-base consonant, find 1279 * first post-base classified consonant not ligated with main. If no 1280 * consonant is found, the target position should be before the 1281 * first matra, syllable modifier sign or vedic sign. 1282 */ 1283 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 1284 if (reph_pos == REPH_POS_AFTER_SUB) 1285 { 1286 new_reph_pos = base; 1287 while (new_reph_pos + 1 < end && 1288 !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) 1289 new_reph_pos++; 1290 if (new_reph_pos < end) 1291 goto reph_move; 1292 } 1293 1294 /* 5. If no consonant is found in steps 3 or 4, move reph to a position 1295 * immediately before the first post-base matra, syllable modifier 1296 * sign or vedic sign that has a reordering class after the intended 1297 * reph position. For example, if the reordering position for reph 1298 * is post-main, it will skip above-base matras that also have a 1299 * post-main position. 1300 */ 1301 reph_step_5: 1302 { 1303 /* Copied from step 2. */ 1304 new_reph_pos = start + 1; 1305 while (new_reph_pos < base && !is_halant (info[new_reph_pos])) 1306 new_reph_pos++; 1307 1308 if (new_reph_pos < base && is_halant (info[new_reph_pos])) 1309 { 1310 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1311 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1312 new_reph_pos++; 1313 goto reph_move; 1314 } 1315 } 1316 /* See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 */ 1317 1318 /* 6. Otherwise, reorder reph to the end of the syllable. 1319 */ 1320 { 1321 new_reph_pos = end - 1; 1322 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) 1323 new_reph_pos--; 1324 1325 /* 1326 * If the Reph is to be ending up after a Matra,Halant sequence, 1327 * position it before that Halant so it can interact with the Matra. 1328 * However, if it's a plain Consonant,Halant we shouldn't do that. 1329 * Uniscribe doesn't do this. 1330 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 1331 */ 1332 if (unlikely (is_halant (info[new_reph_pos]))) 1333 { 1334 for (unsigned int i = base + 1; i < new_reph_pos; i++) 1335 if (FLAG_UNSAFE (info[i].indic_category()) & (FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)))) 1336 { 1337 /* Ok, got it. */ 1338 new_reph_pos--; 1339 } 1340 } 1341 1342 goto reph_move; 1343 } 1344 1345 reph_move: 1346 { 1347 /* Move */ 1348 buffer->merge_clusters (start, new_reph_pos + 1); 1349 hb_glyph_info_t reph = info[start]; 1350 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); 1351 info[new_reph_pos] = reph; 1352 1353 if (start < base && base <= new_reph_pos) 1354 base--; 1355 } 1356 } 1357 1358 1359 /* o Reorder pre-base-reordering consonants: 1360 * 1361 * If a pre-base-reordering consonant is found, reorder it according to 1362 * the following rules: 1363 */ 1364 1365 if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */ 1366 { 1367 for (unsigned int i = base + 1; i < end; i++) 1368 if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0) 1369 { 1370 /* 1. Only reorder a glyph produced by substitution during application 1371 * of the <pref> feature. (Note that a font may shape a Ra consonant with 1372 * the feature generally but block it in certain contexts.) 1373 */ 1374 /* Note: We just check that something got substituted. We don't check that 1375 * the <pref> feature actually did it... 1376 * 1377 * Reorder pref only if it ligated. */ 1378 if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i])) 1379 { 1380 /* 1381 * 2. Try to find a target position the same way as for pre-base matra. 1382 * If it is found, reorder pre-base consonant glyph. 1383 * 1384 * 3. If position is not found, reorder immediately before main 1385 * consonant. 1386 */ 1387 1388 unsigned int new_pos = base; 1389 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1390 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1391 * We want to position matra after them. 1392 */ 1393 if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1394 { 1395 while (new_pos > start && 1396 !(is_one_of (info[new_pos - 1], FLAG (I_Cat(M)) | FLAG (I_Cat(MPst)) | FLAG (I_Cat(H))))) 1397 new_pos--; 1398 } 1399 1400 if (new_pos > start && is_halant (info[new_pos - 1])) 1401 { 1402 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1403 if (new_pos < end && is_joiner (info[new_pos])) 1404 new_pos++; 1405 } 1406 1407 { 1408 unsigned int old_pos = i; 1409 1410 buffer->merge_clusters (new_pos, old_pos + 1); 1411 hb_glyph_info_t tmp = info[old_pos]; 1412 memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); 1413 info[new_pos] = tmp; 1414 1415 if (new_pos <= base && base < old_pos) 1416 base++; 1417 } 1418 } 1419 1420 break; 1421 } 1422 } 1423 1424 1425 /* Apply 'init' to the Left Matra if it's a word start. */ 1426 if (info[start].indic_position () == POS_PRE_M) 1427 { 1428 if (!start || 1429 !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) & 1430 FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) 1431 info[start].mask |= indic_plan->mask_array[INDIC_INIT]; 1432 else 1433 buffer->unsafe_to_break (start - 1, start + 1); 1434 } 1435 } 1436 1437 1438 static bool 1439 final_reordering_indic (const hb_ot_shape_plan_t *plan, 1440 hb_font_t *font HB_UNUSED, 1441 hb_buffer_t *buffer) 1442 { 1443 unsigned int count = buffer->len; 1444 if (unlikely (!count)) return false; 1445 1446 if (buffer->message (font, "start reordering indic final")) { 1447 foreach_syllable (buffer, start, end) 1448 final_reordering_syllable_indic (plan, buffer, start, end); 1449 (void) buffer->message (font, "end reordering indic final"); 1450 } 1451 1452 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); 1453 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); 1454 1455 return false; 1456 } 1457 1458 1459 static void 1460 preprocess_text_indic (const hb_ot_shape_plan_t *plan, 1461 hb_buffer_t *buffer, 1462 hb_font_t *font) 1463 { 1464 _hb_preprocess_text_vowel_constraints (plan, buffer, font); 1465 } 1466 1467 static bool 1468 decompose_indic (const hb_ot_shape_normalize_context_t *c, 1469 hb_codepoint_t ab, 1470 hb_codepoint_t *a, 1471 hb_codepoint_t *b) 1472 { 1473 switch (ab) 1474 { 1475 /* Don't decompose these. */ 1476 case 0x0931u : return false; /* DEVANAGARI LETTER RRA */ 1477 // https://github.com/harfbuzz/harfbuzz/issues/779 1478 case 0x09DCu : return false; /* BENGALI LETTER RRA */ 1479 case 0x09DDu : return false; /* BENGALI LETTER RHA */ 1480 case 0x0B94u : return false; /* TAMIL LETTER AU */ 1481 1482 1483 /* 1484 * Decompose split matras that don't have Unicode decompositions. 1485 */ 1486 1487 #if 0 1488 /* Gujarati */ 1489 /* This one has no decomposition in Unicode, but needs no decomposition either. */ 1490 /* case 0x0AC9u : return false; */ 1491 1492 /* Oriya */ 1493 case 0x0B57u : *a = no decomp, -> RIGHT; return true; 1494 #endif 1495 } 1496 1497 return (bool) c->unicode->decompose (ab, a, b); 1498 } 1499 1500 static bool 1501 compose_indic (const hb_ot_shape_normalize_context_t *c, 1502 hb_codepoint_t a, 1503 hb_codepoint_t b, 1504 hb_codepoint_t *ab) 1505 { 1506 /* Avoid recomposing split matras. */ 1507 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) 1508 return false; 1509 1510 /* Composition-exclusion exceptions that we want to recompose. */ 1511 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } 1512 1513 return (bool) c->unicode->compose (a, b, ab); 1514 } 1515 1516 1517 const hb_ot_shaper_t _hb_ot_shaper_indic = 1518 { 1519 collect_features_indic, 1520 override_features_indic, 1521 data_create_indic, 1522 data_destroy_indic, 1523 preprocess_text_indic, 1524 nullptr, /* postprocess_glyphs */ 1525 decompose_indic, 1526 compose_indic, 1527 setup_masks_indic, 1528 nullptr, /* reorder_marks */ 1529 HB_TAG_NONE, /* gpos_tag */ 1530 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, 1531 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 1532 false, /* fallback_position */ 1533 }; 1534 1535 1536 #endif