hb-ucd.cc (7196B)
1 /* 2 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 #include "hb.hh" 18 #include "hb-unicode.hh" 19 #include "hb-machinery.hh" 20 21 #include "hb-ucd-table.hh" 22 23 static hb_unicode_combining_class_t 24 hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, 25 hb_codepoint_t unicode, 26 void *user_data HB_UNUSED) 27 { 28 return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode); 29 } 30 31 static hb_unicode_general_category_t 32 hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, 33 hb_codepoint_t unicode, 34 void *user_data HB_UNUSED) 35 { 36 return (hb_unicode_general_category_t) _hb_ucd_gc (unicode); 37 } 38 39 static hb_codepoint_t 40 hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, 41 hb_codepoint_t unicode, 42 void *user_data HB_UNUSED) 43 { 44 return unicode + _hb_ucd_bmg (unicode); 45 } 46 47 static hb_script_t 48 hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, 49 hb_codepoint_t unicode, 50 void *user_data HB_UNUSED) 51 { 52 return _hb_ucd_sc_map[_hb_ucd_sc (unicode)]; 53 } 54 55 56 #define SBASE 0xAC00u 57 #define LBASE 0x1100u 58 #define VBASE 0x1161u 59 #define TBASE 0x11A7u 60 #define SCOUNT 11172u 61 #define LCOUNT 19u 62 #define VCOUNT 21u 63 #define TCOUNT 28u 64 #define NCOUNT (VCOUNT * TCOUNT) 65 66 static inline bool 67 _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) 68 { 69 unsigned si = ab - SBASE; 70 71 if (si >= SCOUNT) 72 return false; 73 74 if (si % TCOUNT) 75 { 76 /* LV,T */ 77 *a = SBASE + (si / TCOUNT) * TCOUNT; 78 *b = TBASE + (si % TCOUNT); 79 return true; 80 } else { 81 /* L,V */ 82 *a = LBASE + (si / NCOUNT); 83 *b = VBASE + (si % NCOUNT) / TCOUNT; 84 return true; 85 } 86 } 87 88 static inline bool 89 _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) 90 { 91 if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) && 92 !((a - SBASE) % TCOUNT)) 93 { 94 /* LV,T */ 95 *ab = a + (b - TBASE); 96 return true; 97 } 98 else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) 99 { 100 /* L,V */ 101 int li = a - LBASE; 102 int vi = b - VBASE; 103 *ab = SBASE + li * NCOUNT + vi * TCOUNT; 104 return true; 105 } 106 else 107 return false; 108 } 109 110 static int 111 _cmp_pair (const void *_key, const void *_item) 112 { 113 uint64_t& a = * (uint64_t*) _key; 114 uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0); 115 116 return a < b ? -1 : a > b ? +1 : 0; 117 } 118 static int 119 _cmp_pair_11_7_14 (const void *_key, const void *_item) 120 { 121 uint32_t& a = * (uint32_t*) _key; 122 uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0); 123 124 return a < b ? -1 : a > b ? +1 : 0; 125 } 126 127 static hb_bool_t 128 hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 129 hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab, 130 void *user_data HB_UNUSED) 131 { 132 // Hangul is handled algorithmically. 133 if (_hb_ucd_compose_hangul (a, b, ab)) return true; 134 135 hb_codepoint_t u = 0; 136 137 if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u) 138 { 139 /* If "a" is small enough and "b" is in the U+0300 range, 140 * the composition data is encoded in a 32bit array sorted 141 * by "a,b" pair. */ 142 uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0); 143 const uint32_t *v = hb_bsearch (k, 144 _hb_ucd_dm2_u32_map, 145 ARRAY_LENGTH (_hb_ucd_dm2_u32_map), 146 sizeof (*_hb_ucd_dm2_u32_map), 147 _cmp_pair_11_7_14); 148 if (likely (!v)) return false; 149 u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v); 150 } 151 else 152 { 153 /* Otherwise it is stored in a 64bit array sorted by 154 * "a,b" pair. */ 155 uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0); 156 const uint64_t *v = hb_bsearch (k, 157 _hb_ucd_dm2_u64_map, 158 ARRAY_LENGTH (_hb_ucd_dm2_u64_map), 159 sizeof (*_hb_ucd_dm2_u64_map), 160 _cmp_pair); 161 if (likely (!v)) return false; 162 u = HB_CODEPOINT_DECODE3_3 (*v); 163 } 164 165 if (unlikely (!u)) return false; 166 *ab = u; 167 return true; 168 } 169 170 static hb_bool_t 171 hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 172 hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b, 173 void *user_data HB_UNUSED) 174 { 175 if (_hb_ucd_decompose_hangul (ab, a, b)) return true; 176 177 unsigned i = _hb_ucd_dm (ab); 178 179 /* If no data, there's no decomposition. */ 180 if (likely (!i)) return false; 181 i--; 182 183 /* Check if it's a single-character decomposition. */ 184 if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map)) 185 { 186 /* Single-character decompositions currently are only in plane 0 or plane 2. */ 187 if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map)) 188 { 189 /* Plane 0. */ 190 *a = _hb_ucd_dm1_p0_map[i]; 191 } 192 else 193 { 194 /* Plane 2. */ 195 i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map); 196 *a = 0x20000 | _hb_ucd_dm1_p2_map[i]; 197 } 198 *b = 0; 199 return true; 200 } 201 i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map); 202 203 /* Otherwise they are encoded either in a 32bit array or a 64bit array. */ 204 if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map)) 205 { 206 /* 32bit array. */ 207 uint32_t v = _hb_ucd_dm2_u32_map[i]; 208 *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v); 209 *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v); 210 return true; 211 } 212 i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map); 213 214 /* 64bit array. */ 215 uint64_t v = _hb_ucd_dm2_u64_map[i]; 216 *a = HB_CODEPOINT_DECODE3_1 (v); 217 *b = HB_CODEPOINT_DECODE3_2 (v); 218 return true; 219 } 220 221 222 static void free_static_ucd_funcs (); 223 224 static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t> 225 { 226 static hb_unicode_funcs_t *create () 227 { 228 hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); 229 230 hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr); 231 hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr); 232 hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr); 233 hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr); 234 hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr); 235 hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr); 236 237 hb_unicode_funcs_make_immutable (funcs); 238 239 hb_atexit (free_static_ucd_funcs); 240 241 return funcs; 242 } 243 } static_ucd_funcs; 244 245 static inline 246 void free_static_ucd_funcs () 247 { 248 static_ucd_funcs.free_instance (); 249 } 250 251 hb_unicode_funcs_t * 252 hb_ucd_get_unicode_funcs () 253 { 254 #ifdef HB_NO_UCD 255 return hb_unicode_funcs_get_empty (); 256 #endif 257 return static_ucd_funcs.get_unconst (); 258 }