hb-ot-shaper-use-machine.rl (9115B)
1 /* 2 * Copyright © 2015 Mozilla Foundation. 3 * Copyright © 2015 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Mozilla Author(s): Jonathan Kew 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29 #ifndef HB_OT_SHAPER_USE_MACHINE_HH 30 #define HB_OT_SHAPER_USE_MACHINE_HH 31 32 #include "hb.hh" 33 34 #include "hb-ot-shaper-syllabic.hh" 35 36 /* buffer var allocations */ 37 #define use_category() ot_shaper_var_u8_category() 38 39 #define USE(Cat) use_syllable_machine_ex_##Cat 40 41 enum use_syllable_type_t { 42 use_virama_terminated_cluster, 43 use_sakot_terminated_cluster, 44 use_standard_cluster, 45 use_number_joiner_terminated_cluster, 46 use_numeral_cluster, 47 use_symbol_cluster, 48 use_hieroglyph_cluster, 49 use_broken_cluster, 50 use_non_cluster, 51 }; 52 53 %%{ 54 machine use_syllable_machine; 55 alphtype unsigned char; 56 write exports; 57 write data; 58 }%% 59 60 %%{ 61 62 # Categories used in the Universal Shaping Engine spec: 63 # https://docs.microsoft.com/en-us/typography/script-development/use 64 65 export O = 0; # OTHER 66 67 export B = 1; # BASE 68 export N = 4; # BASE_NUM 69 export GB = 5; # BASE_OTHER 70 export CGJ = 6; # CGJ 71 export SUB = 11; # CONS_SUB 72 export H = 12; # HALANT 73 74 export HN = 13; # HALANT_NUM 75 export ZWNJ = 14; # Zero width non-joiner 76 export WJ = 16; # Word joiner 77 export R = 18; # REPHA 78 export CS = 43; # CONS_WITH_STACKER 79 export IS = 44; # INVISIBLE_STACKER 80 export Sk = 48; # SAKOT 81 export G = 49; # HIEROGLYPH 82 export J = 50; # HIEROGLYPH_JOINER 83 export SB = 51; # HIEROGLYPH_SEGMENT_BEGIN 84 export SE = 52; # HIEROGLYPH_SEGMENT_END 85 export HVM = 53; # HALANT_OR_VOWEL_MODIFIER 86 export HM = 54; # HIEROGLYPH_MOD 87 export HR = 55; # HIEROGLYPH_MIRROR 88 export RK = 56; # REORDERING_KILLER 89 90 export FAbv = 24; # CONS_FINAL_ABOVE 91 export FBlw = 25; # CONS_FINAL_BELOW 92 export FPst = 26; # CONS_FINAL_POST 93 export MAbv = 27; # CONS_MED_ABOVE 94 export MBlw = 28; # CONS_MED_BELOW 95 export MPst = 29; # CONS_MED_POST 96 export MPre = 30; # CONS_MED_PRE 97 export CMAbv = 31; # CONS_MOD_ABOVE 98 export CMBlw = 32; # CONS_MOD_BELOW 99 export VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST 100 export VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST 101 export VPst = 35; # VOWEL_POST UIPC = Right 102 export VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST 103 export VMAbv = 37; # VOWEL_MOD_ABOVE 104 export VMBlw = 38; # VOWEL_MOD_BELOW 105 export VMPst = 39; # VOWEL_MOD_POST 106 export VMPre = 23; # VOWEL_MOD_PRE 107 export SMAbv = 41; # SYM_MOD_ABOVE 108 export SMBlw = 42; # SYM_MOD_BELOW 109 export FMAbv = 45; # CONS_FINAL_MOD UIPC = Top 110 export FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom 111 export FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable 112 113 114 h = H | HVM | IS | Sk; 115 116 consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv* CMBlw*)*; 117 medial_consonants = MPre? MAbv? MBlw? MPst?; 118 dependent_vowels = VPre* VAbv* VBlw* VPst* | H; 119 vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; 120 final_consonants = FAbv* FBlw* FPst*; 121 final_modifiers = FMAbv* FMBlw* | FMPst?; 122 123 complex_syllable_start = (R | CS)? (B | GB); 124 complex_syllable_middle = 125 consonant_modifiers 126 medial_consonants 127 dependent_vowels 128 vowel_modifiers 129 (Sk B)* 130 ; 131 complex_syllable_tail = 132 complex_syllable_middle 133 final_consonants 134 final_modifiers 135 ; 136 number_joiner_terminated_cluster_tail = (HN N)* HN; 137 numeral_cluster_tail = (HN N)+; 138 symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; 139 140 virama_terminated_cluster_tail = 141 consonant_modifiers 142 (IS | RK) 143 ; 144 virama_terminated_cluster = 145 complex_syllable_start 146 virama_terminated_cluster_tail 147 ; 148 sakot_terminated_cluster_tail = 149 complex_syllable_middle 150 Sk 151 ; 152 sakot_terminated_cluster = 153 complex_syllable_start 154 sakot_terminated_cluster_tail 155 ; 156 standard_cluster = 157 complex_syllable_start 158 complex_syllable_tail 159 ; 160 tail = complex_syllable_tail | sakot_terminated_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail; 161 broken_cluster = 162 R? 163 (tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail) 164 ; 165 166 number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; 167 numeral_cluster = N numeral_cluster_tail?; 168 symbol_cluster = (O | GB | SB) tail?; 169 hieroglyph_cluster = SB* G HR? HM? SE* (J SB* (G HR? HM? SE*)?)*; 170 other = any; 171 172 main := |* 173 virama_terminated_cluster ZWNJ? => { found_syllable (use_virama_terminated_cluster); }; 174 sakot_terminated_cluster ZWNJ? => { found_syllable (use_sakot_terminated_cluster); }; 175 standard_cluster ZWNJ? => { found_syllable (use_standard_cluster); }; 176 number_joiner_terminated_cluster ZWNJ? => { found_syllable (use_number_joiner_terminated_cluster); }; 177 numeral_cluster ZWNJ? => { found_syllable (use_numeral_cluster); }; 178 symbol_cluster ZWNJ? => { found_syllable (use_symbol_cluster); }; 179 hieroglyph_cluster ZWNJ? => { found_syllable (use_hieroglyph_cluster); }; 180 FMPst => { found_syllable (use_non_cluster); }; 181 broken_cluster ZWNJ? => { found_syllable (use_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }; 182 other => { found_syllable (use_non_cluster); }; 183 *|; 184 185 186 }%% 187 188 #define found_syllable(syllable_type) \ 189 HB_STMT_START { \ 190 if (0) fprintf (stderr, "syllable %u..%u %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ 191 for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ 192 info[i].syllable() = (syllable_serial << 4) | syllable_type; \ 193 syllable_serial++; \ 194 if (syllable_serial == 16) syllable_serial = 1; \ 195 } HB_STMT_END 196 197 198 template <typename Iter> 199 struct machine_index_t : 200 hb_iter_with_fallback_t<machine_index_t<Iter>, 201 typename Iter::item_t> 202 { 203 machine_index_t (const Iter& it) : it (it) {} 204 machine_index_t (const machine_index_t& o) : hb_iter_with_fallback_t<machine_index_t<Iter>, 205 typename Iter::item_t> (), 206 it (o.it), is_null (o.is_null) {} 207 208 static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator; 209 static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator; 210 211 typename Iter::item_t __item__ () const { return *it; } 212 typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; } 213 unsigned __len__ () const { return it.len (); } 214 void __next__ () { ++it; } 215 void __forward__ (unsigned n) { it += n; } 216 void __prev__ () { --it; } 217 void __rewind__ (unsigned n) { it -= n; } 218 219 void operator = (unsigned n) 220 { 221 assert (n == 0); 222 is_null = true; 223 } 224 explicit operator bool () { return !is_null; } 225 226 void operator = (const machine_index_t& o) 227 { 228 is_null = o.is_null; 229 unsigned index = (*it).first; 230 unsigned n = (*o.it).first; 231 if (index < n) it += n - index; else if (index > n) it -= index - n; 232 } 233 bool operator == (const machine_index_t& o) const 234 { return is_null ? o.is_null : !o.is_null && (*it).first == (*o.it).first; } 235 bool operator != (const machine_index_t& o) const { return !(*this == o); } 236 237 private: 238 Iter it; 239 bool is_null = false; 240 }; 241 struct 242 { 243 template <typename Iter, 244 hb_requires (hb_is_iterable (Iter))> 245 machine_index_t<hb_iter_type<Iter>> 246 operator () (Iter&& it) const 247 { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); } 248 } 249 HB_FUNCOBJ (machine_index); 250 251 252 253 static bool 254 not_ccs_default_ignorable (const hb_glyph_info_t &i) 255 { return i.use_category() != USE(CGJ); } 256 257 static inline void 258 find_syllables_use (hb_buffer_t *buffer) 259 { 260 hb_glyph_info_t *info = buffer->info; 261 auto p = 262 + hb_iter (info, buffer->len) 263 | hb_enumerate 264 | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); }, 265 hb_second) 266 | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p) 267 { 268 if (p.second.use_category() == USE(ZWNJ)) 269 for (unsigned i = p.first + 1; i < buffer->len; ++i) 270 if (not_ccs_default_ignorable (info[i])) 271 return !_hb_glyph_info_is_unicode_mark (&info[i]); 272 return true; 273 }) 274 | hb_enumerate 275 | machine_index 276 ; 277 auto pe = p + p.len (); 278 auto eof = +pe; 279 auto ts = +p; 280 auto te = +p; 281 unsigned int act HB_UNUSED; 282 int cs; 283 %%{ 284 write init; 285 getkey (*p).second.second.use_category(); 286 }%% 287 288 unsigned int syllable_serial = 1; 289 %%{ 290 write exec; 291 }%% 292 } 293 294 #undef found_syllable 295 296 #endif /* HB_OT_SHAPER_USE_MACHINE_HH */