tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-ucd.cc (7196B)


      1 /*
      2 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
      3 *
      4 * Permission to use, copy, modify, and/or distribute this software for any
      5 * purpose with or without fee is hereby granted, provided that the above
      6 * copyright notice and this permission notice appear in all copies.
      7 *
      8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     15 */
     16 
     17 #include "hb.hh"
     18 #include "hb-unicode.hh"
     19 #include "hb-machinery.hh"
     20 
     21 #include "hb-ucd-table.hh"
     22 
     23 static hb_unicode_combining_class_t
     24 hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
     25 		hb_codepoint_t unicode,
     26 		void *user_data HB_UNUSED)
     27 {
     28  return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
     29 }
     30 
     31 static hb_unicode_general_category_t
     32 hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
     33 		 hb_codepoint_t unicode,
     34 		 void *user_data HB_UNUSED)
     35 {
     36  return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
     37 }
     38 
     39 static hb_codepoint_t
     40 hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
     41 	  hb_codepoint_t unicode,
     42 	  void *user_data HB_UNUSED)
     43 {
     44  return unicode + _hb_ucd_bmg (unicode);
     45 }
     46 
     47 static hb_script_t
     48 hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
     49        hb_codepoint_t unicode,
     50        void *user_data HB_UNUSED)
     51 {
     52  return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
     53 }
     54 
     55 
     56 #define SBASE 0xAC00u
     57 #define LBASE 0x1100u
     58 #define VBASE 0x1161u
     59 #define TBASE 0x11A7u
     60 #define SCOUNT 11172u
     61 #define LCOUNT 19u
     62 #define VCOUNT 21u
     63 #define TCOUNT 28u
     64 #define NCOUNT (VCOUNT * TCOUNT)
     65 
     66 static inline bool
     67 _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
     68 {
     69  unsigned si = ab - SBASE;
     70 
     71  if (si >= SCOUNT)
     72    return false;
     73 
     74  if (si % TCOUNT)
     75  {
     76    /* LV,T */
     77    *a = SBASE + (si / TCOUNT) * TCOUNT;
     78    *b = TBASE + (si % TCOUNT);
     79    return true;
     80  } else {
     81    /* L,V */
     82    *a = LBASE + (si / NCOUNT);
     83    *b = VBASE + (si % NCOUNT) / TCOUNT;
     84    return true;
     85  }
     86 }
     87 
     88 static inline bool
     89 _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
     90 {
     91  if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
     92    !((a - SBASE) % TCOUNT))
     93  {
     94    /* LV,T */
     95    *ab = a + (b - TBASE);
     96    return true;
     97  }
     98  else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
     99  {
    100    /* L,V */
    101    int li = a - LBASE;
    102    int vi = b - VBASE;
    103    *ab = SBASE + li * NCOUNT + vi * TCOUNT;
    104    return true;
    105  }
    106  else
    107    return false;
    108 }
    109 
    110 static int
    111 _cmp_pair (const void *_key, const void *_item)
    112 {
    113  uint64_t& a = * (uint64_t*) _key;
    114  uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
    115 
    116  return a < b ? -1 : a > b ? +1 : 0;
    117 }
    118 static int
    119 _cmp_pair_11_7_14 (const void *_key, const void *_item)
    120 {
    121  uint32_t& a = * (uint32_t*) _key;
    122  uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
    123 
    124  return a < b ? -1 : a > b ? +1 : 0;
    125 }
    126 
    127 static hb_bool_t
    128 hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    129 	hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
    130 	void *user_data HB_UNUSED)
    131 {
    132  // Hangul is handled algorithmically.
    133  if (_hb_ucd_compose_hangul (a, b, ab)) return true;
    134 
    135  hb_codepoint_t u = 0;
    136 
    137  if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
    138  {
    139    /* If "a" is small enough and "b" is in the U+0300 range,
    140     * the composition data is encoded in a 32bit array sorted
    141     * by "a,b" pair. */
    142    uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
    143    const uint32_t *v = hb_bsearch (k,
    144 			    _hb_ucd_dm2_u32_map,
    145 			    ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
    146 			    sizeof (*_hb_ucd_dm2_u32_map),
    147 			    _cmp_pair_11_7_14);
    148    if (likely (!v)) return false;
    149    u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
    150  }
    151  else
    152  {
    153    /* Otherwise it is stored in a 64bit array sorted by
    154     * "a,b" pair. */
    155    uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
    156    const uint64_t *v = hb_bsearch (k,
    157 			    _hb_ucd_dm2_u64_map,
    158 			    ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
    159 			    sizeof (*_hb_ucd_dm2_u64_map),
    160 			    _cmp_pair);
    161    if (likely (!v)) return false;
    162    u = HB_CODEPOINT_DECODE3_3 (*v);
    163  }
    164 
    165  if (unlikely (!u)) return false;
    166  *ab = u;
    167  return true;
    168 }
    169 
    170 static hb_bool_t
    171 hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    172 	  hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
    173 	  void *user_data HB_UNUSED)
    174 {
    175  if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
    176 
    177  unsigned i = _hb_ucd_dm (ab);
    178 
    179  /* If no data, there's no decomposition. */
    180  if (likely (!i)) return false;
    181  i--;
    182 
    183  /* Check if it's a single-character decomposition. */
    184  if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
    185  {
    186    /* Single-character decompositions currently are only in plane 0 or plane 2. */
    187    if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
    188    {
    189      /* Plane 0. */
    190      *a = _hb_ucd_dm1_p0_map[i];
    191    }
    192    else
    193    {
    194      /* Plane 2. */
    195      i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
    196      *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
    197    }
    198    *b = 0;
    199    return true;
    200  }
    201  i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
    202 
    203  /* Otherwise they are encoded either in a 32bit array or a 64bit array. */
    204  if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
    205  {
    206    /* 32bit array. */
    207    uint32_t v = _hb_ucd_dm2_u32_map[i];
    208    *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
    209    *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
    210    return true;
    211  }
    212  i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
    213 
    214  /* 64bit array. */
    215  uint64_t v = _hb_ucd_dm2_u64_map[i];
    216  *a = HB_CODEPOINT_DECODE3_1 (v);
    217  *b = HB_CODEPOINT_DECODE3_2 (v);
    218  return true;
    219 }
    220 
    221 
    222 static void free_static_ucd_funcs ();
    223 
    224 static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
    225 {
    226  static hb_unicode_funcs_t *create ()
    227  {
    228    hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
    229 
    230    hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
    231    hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
    232    hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
    233    hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
    234    hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
    235    hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
    236 
    237    hb_unicode_funcs_make_immutable (funcs);
    238 
    239    hb_atexit (free_static_ucd_funcs);
    240 
    241    return funcs;
    242  }
    243 } static_ucd_funcs;
    244 
    245 static inline
    246 void free_static_ucd_funcs ()
    247 {
    248  static_ucd_funcs.free_instance ();
    249 }
    250 
    251 hb_unicode_funcs_t *
    252 hb_ucd_get_unicode_funcs ()
    253 {
    254 #ifdef HB_NO_UCD
    255  return hb_unicode_funcs_get_empty ();
    256 #endif
    257  return static_ucd_funcs.get_unconst ();
    258 }