tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-ot-shaper-use-machine.rl (9115B)


      1 /*
      2 * Copyright © 2015  Mozilla Foundation.
      3 * Copyright © 2015  Google, Inc.
      4 *
      5 *  This is part of HarfBuzz, a text shaping library.
      6 *
      7 * Permission is hereby granted, without written agreement and without
      8 * license or royalty fees, to use, copy, modify, and distribute this
      9 * software and its documentation for any purpose, provided that the
     10 * above copyright notice and the following two paragraphs appear in
     11 * all copies of this software.
     12 *
     13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17 * DAMAGE.
     18 *
     19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24 *
     25 * Mozilla Author(s): Jonathan Kew
     26 * Google Author(s): Behdad Esfahbod
     27 */
     28 
     29 #ifndef HB_OT_SHAPER_USE_MACHINE_HH
     30 #define HB_OT_SHAPER_USE_MACHINE_HH
     31 
     32 #include "hb.hh"
     33 
     34 #include "hb-ot-shaper-syllabic.hh"
     35 
     36 /* buffer var allocations */
     37 #define use_category() ot_shaper_var_u8_category()
     38 
     39 #define USE(Cat) use_syllable_machine_ex_##Cat
     40 
     41 enum use_syllable_type_t {
     42  use_virama_terminated_cluster,
     43  use_sakot_terminated_cluster,
     44  use_standard_cluster,
     45  use_number_joiner_terminated_cluster,
     46  use_numeral_cluster,
     47  use_symbol_cluster,
     48  use_hieroglyph_cluster,
     49  use_broken_cluster,
     50  use_non_cluster,
     51 };
     52 
     53 %%{
     54  machine use_syllable_machine;
     55  alphtype unsigned char;
     56  write exports;
     57  write data;
     58 }%%
     59 
     60 %%{
     61 
     62 # Categories used in the Universal Shaping Engine spec:
     63 # https://docs.microsoft.com/en-us/typography/script-development/use
     64 
     65 export O	= 0; # OTHER
     66 
     67 export B	= 1; # BASE
     68 export N	= 4; # BASE_NUM
     69 export GB	= 5; # BASE_OTHER
     70 export CGJ	= 6; # CGJ
     71 export SUB	= 11; # CONS_SUB
     72 export H	= 12; # HALANT
     73 
     74 export HN	= 13; # HALANT_NUM
     75 export ZWNJ	= 14; # Zero width non-joiner
     76 export WJ	= 16; # Word joiner
     77 export R	= 18; # REPHA
     78 export CS	= 43; # CONS_WITH_STACKER
     79 export IS	= 44; # INVISIBLE_STACKER
     80 export Sk	= 48; # SAKOT
     81 export G	= 49; # HIEROGLYPH
     82 export J	= 50; # HIEROGLYPH_JOINER
     83 export SB	= 51; # HIEROGLYPH_SEGMENT_BEGIN
     84 export SE	= 52; # HIEROGLYPH_SEGMENT_END
     85 export HVM	= 53; # HALANT_OR_VOWEL_MODIFIER
     86 export HM	= 54; # HIEROGLYPH_MOD
     87 export HR	= 55; # HIEROGLYPH_MIRROR
     88 export RK	= 56; # REORDERING_KILLER
     89 
     90 export FAbv	= 24; # CONS_FINAL_ABOVE
     91 export FBlw	= 25; # CONS_FINAL_BELOW
     92 export FPst	= 26; # CONS_FINAL_POST
     93 export MAbv	= 27; # CONS_MED_ABOVE
     94 export MBlw	= 28; # CONS_MED_BELOW
     95 export MPst	= 29; # CONS_MED_POST
     96 export MPre	= 30; # CONS_MED_PRE
     97 export CMAbv	= 31; # CONS_MOD_ABOVE
     98 export CMBlw	= 32; # CONS_MOD_BELOW
     99 export VAbv	= 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
    100 export VBlw	= 34; # VOWEL_BELOW / VOWEL_BELOW_POST
    101 export VPst	= 35; # VOWEL_POST	UIPC = Right
    102 export VPre	= 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
    103 export VMAbv	= 37; # VOWEL_MOD_ABOVE
    104 export VMBlw	= 38; # VOWEL_MOD_BELOW
    105 export VMPst	= 39; # VOWEL_MOD_POST
    106 export VMPre	= 23; # VOWEL_MOD_PRE
    107 export SMAbv	= 41; # SYM_MOD_ABOVE
    108 export SMBlw	= 42; # SYM_MOD_BELOW
    109 export FMAbv	= 45; # CONS_FINAL_MOD	UIPC = Top
    110 export FMBlw	= 46; # CONS_FINAL_MOD	UIPC = Bottom
    111 export FMPst	= 47; # CONS_FINAL_MOD	UIPC = Not_Applicable
    112 
    113 
    114 h = H | HVM | IS | Sk;
    115 
    116 consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv* CMBlw*)*;
    117 medial_consonants = MPre? MAbv? MBlw? MPst?;
    118 dependent_vowels = VPre* VAbv* VBlw* VPst* | H;
    119 vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
    120 final_consonants = FAbv* FBlw* FPst*;
    121 final_modifiers = FMAbv* FMBlw* | FMPst?;
    122 
    123 complex_syllable_start = (R | CS)? (B | GB);
    124 complex_syllable_middle =
    125 consonant_modifiers
    126 medial_consonants
    127 dependent_vowels
    128 vowel_modifiers
    129 (Sk B)*
    130 ;
    131 complex_syllable_tail =
    132 complex_syllable_middle
    133 final_consonants
    134 final_modifiers
    135 ;
    136 number_joiner_terminated_cluster_tail = (HN N)* HN;
    137 numeral_cluster_tail = (HN N)+;
    138 symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
    139 
    140 virama_terminated_cluster_tail =
    141 consonant_modifiers
    142 (IS | RK)
    143 ;
    144 virama_terminated_cluster =
    145 complex_syllable_start
    146 virama_terminated_cluster_tail
    147 ;
    148 sakot_terminated_cluster_tail =
    149 complex_syllable_middle
    150 Sk
    151 ;
    152 sakot_terminated_cluster =
    153 complex_syllable_start
    154 sakot_terminated_cluster_tail
    155 ;
    156 standard_cluster =
    157 complex_syllable_start
    158 complex_syllable_tail
    159 ;
    160 tail = complex_syllable_tail | sakot_terminated_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail;
    161 broken_cluster =
    162 R?
    163 (tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail)
    164 ;
    165 
    166 number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
    167 numeral_cluster = N numeral_cluster_tail?;
    168 symbol_cluster = (O | GB | SB) tail?;
    169 hieroglyph_cluster = SB* G HR? HM? SE* (J SB* (G HR? HM? SE*)?)*;
    170 other = any;
    171 
    172 main := |*
    173 virama_terminated_cluster ZWNJ?		=> { found_syllable (use_virama_terminated_cluster); };
    174 sakot_terminated_cluster ZWNJ?		=> { found_syllable (use_sakot_terminated_cluster); };
    175 standard_cluster ZWNJ?			=> { found_syllable (use_standard_cluster); };
    176 number_joiner_terminated_cluster ZWNJ?	=> { found_syllable (use_number_joiner_terminated_cluster); };
    177 numeral_cluster ZWNJ?			=> { found_syllable (use_numeral_cluster); };
    178 symbol_cluster ZWNJ?			=> { found_syllable (use_symbol_cluster); };
    179 hieroglyph_cluster ZWNJ?		=> { found_syllable (use_hieroglyph_cluster); };
    180 FMPst					=> { found_syllable (use_non_cluster); };
    181 broken_cluster ZWNJ?			=> { found_syllable (use_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; };
    182 other					=> { found_syllable (use_non_cluster); };
    183 *|;
    184 
    185 
    186 }%%
    187 
    188 #define found_syllable(syllable_type) \
    189  HB_STMT_START { \
    190    if (0) fprintf (stderr, "syllable %u..%u %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
    191    for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
    192      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
    193    syllable_serial++; \
    194    if (syllable_serial == 16) syllable_serial = 1; \
    195  } HB_STMT_END
    196 
    197 
    198 template <typename Iter>
    199 struct machine_index_t :
    200  hb_iter_with_fallback_t<machine_index_t<Iter>,
    201 		  typename Iter::item_t>
    202 {
    203  machine_index_t (const Iter& it) : it (it) {}
    204  machine_index_t (const machine_index_t& o) : hb_iter_with_fallback_t<machine_index_t<Iter>,
    205 							       typename Iter::item_t> (),
    206 				       it (o.it), is_null (o.is_null) {}
    207 
    208  static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
    209  static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
    210 
    211  typename Iter::item_t __item__ () const { return *it; }
    212  typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
    213  unsigned __len__ () const { return it.len (); }
    214  void __next__ () { ++it; }
    215  void __forward__ (unsigned n) { it += n; }
    216  void __prev__ () { --it; }
    217  void __rewind__ (unsigned n) { it -= n; }
    218 
    219  void operator = (unsigned n)
    220  {
    221    assert (n == 0);
    222    is_null = true;
    223  }
    224  explicit operator bool () { return !is_null; }
    225 
    226  void operator = (const machine_index_t& o)
    227  {
    228    is_null = o.is_null;
    229    unsigned index = (*it).first;
    230    unsigned n = (*o.it).first;
    231    if (index < n) it += n - index; else if (index > n) it -= index - n;
    232  }
    233  bool operator == (const machine_index_t& o) const
    234  { return is_null ? o.is_null : !o.is_null && (*it).first == (*o.it).first; }
    235  bool operator != (const machine_index_t& o) const { return !(*this == o); }
    236 
    237  private:
    238  Iter it;
    239  bool is_null = false;
    240 };
    241 struct
    242 {
    243  template <typename Iter,
    244     hb_requires (hb_is_iterable (Iter))>
    245  machine_index_t<hb_iter_type<Iter>>
    246  operator () (Iter&& it) const
    247  { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
    248 }
    249 HB_FUNCOBJ (machine_index);
    250 
    251 
    252 
    253 static bool
    254 not_ccs_default_ignorable (const hb_glyph_info_t &i)
    255 { return i.use_category() != USE(CGJ); }
    256 
    257 static inline void
    258 find_syllables_use (hb_buffer_t *buffer)
    259 {
    260  hb_glyph_info_t *info = buffer->info;
    261  auto p =
    262    + hb_iter (info, buffer->len)
    263    | hb_enumerate
    264    | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); },
    265 	 hb_second)
    266    | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
    267 	 {
    268 	   if (p.second.use_category() == USE(ZWNJ))
    269 	     for (unsigned i = p.first + 1; i < buffer->len; ++i)
    270 	       if (not_ccs_default_ignorable (info[i]))
    271 		 return !_hb_glyph_info_is_unicode_mark (&info[i]);
    272 	   return true;
    273 	 })
    274    | hb_enumerate
    275    | machine_index
    276    ;
    277  auto pe = p + p.len ();
    278  auto eof = +pe;
    279  auto ts = +p;
    280  auto te = +p;
    281  unsigned int act HB_UNUSED;
    282  int cs;
    283  %%{
    284    write init;
    285    getkey (*p).second.second.use_category();
    286  }%%
    287 
    288  unsigned int syllable_serial = 1;
    289  %%{
    290    write exec;
    291  }%%
    292 }
    293 
    294 #undef found_syllable
    295 
    296 #endif /* HB_OT_SHAPER_USE_MACHINE_HH */