tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-icu.cc (9560B)


      1 /*
      2 * Copyright © 2009  Red Hat, Inc.
      3 * Copyright © 2009  Keith Stribley
      4 * Copyright © 2011  Google, Inc.
      5 *
      6 *  This is part of HarfBuzz, a text shaping library.
      7 *
      8 * Permission is hereby granted, without written agreement and without
      9 * license or royalty fees, to use, copy, modify, and distribute this
     10 * software and its documentation for any purpose, provided that the
     11 * above copyright notice and the following two paragraphs appear in
     12 * all copies of this software.
     13 *
     14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     18 * DAMAGE.
     19 *
     20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     22 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     25 *
     26 * Red Hat Author(s): Behdad Esfahbod
     27 * Google Author(s): Behdad Esfahbod
     28 */
     29 
     30 #include "hb.hh"
     31 
     32 #ifdef HAVE_ICU
     33 
     34 #pragma GCC diagnostic push
     35 
     36 // https://github.com/harfbuzz/harfbuzz/issues/4915
     37 #pragma GCC diagnostic ignored "-Wredundant-decls"
     38 
     39 #include "hb-icu.h"
     40 
     41 #include "hb-machinery.hh"
     42 
     43 #include <unicode/uchar.h>
     44 #include <unicode/unorm2.h>
     45 #include <unicode/ustring.h>
     46 #include <unicode/utf16.h>
     47 #include <unicode/uversion.h>
     48 
     49 /* ICU extra semicolon, fixed since 65, https://github.com/unicode-org/icu/commit/480bec3 */
     50 #if U_ICU_VERSION_MAJOR_NUM < 65 && (defined(__GNUC__) || defined(__clang__))
     51 #define HB_ICU_EXTRA_SEMI_IGNORED
     52 #pragma GCC diagnostic ignored "-Wextra-semi-stmt"
     53 #endif
     54 
     55 /**
     56 * SECTION:hb-icu
     57 * @title: hb-icu
     58 * @short_description: ICU integration
     59 * @include: hb-icu.h
     60 *
     61 * Functions for using HarfBuzz with the International Components for Unicode
     62 * (ICU) library. HarfBuzz supports using ICU to provide Unicode data, by attaching
     63 * ICU functions to the virtual methods in a #hb_unicode_funcs_t function
     64 * structure.
     65 **/
     66 
     67 /**
     68 * hb_icu_script_to_script:
     69 * @script: The UScriptCode identifier to query
     70 *
     71 * Fetches the #hb_script_t script that corresponds to the
     72 * specified UScriptCode identifier.
     73 *
     74 * Return value: the #hb_script_t script found
     75 *
     76 **/
     77 
     78 hb_script_t
     79 hb_icu_script_to_script (UScriptCode script)
     80 {
     81  if (unlikely (script == USCRIPT_INVALID_CODE))
     82    return HB_SCRIPT_INVALID;
     83 
     84  return hb_script_from_string (uscript_getShortName (script), -1);
     85 }
     86 
     87 /**
     88 * hb_icu_script_from_script:
     89 * @script: The #hb_script_t script to query
     90 *
     91 * Fetches the UScriptCode identifier that corresponds to the
     92 * specified #hb_script_t script.
     93 *
     94 * Return value: the UScriptCode identifier found
     95 *
     96 **/
     97 UScriptCode
     98 hb_icu_script_from_script (hb_script_t script)
     99 {
    100  UScriptCode out = USCRIPT_INVALID_CODE;
    101 
    102  if (unlikely (script == HB_SCRIPT_INVALID))
    103    return out;
    104 
    105  UErrorCode icu_err = U_ZERO_ERROR;
    106  const unsigned char buf[5] = {HB_UNTAG (script), 0};
    107  uscript_getCode ((const char *) buf, &out, 1, &icu_err);
    108 
    109  return out;
    110 }
    111 
    112 
    113 static hb_unicode_combining_class_t
    114 hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    115 			hb_codepoint_t      unicode,
    116 			void               *user_data HB_UNUSED)
    117 
    118 {
    119  return (hb_unicode_combining_class_t) u_getCombiningClass (unicode);
    120 }
    121 
    122 static hb_unicode_general_category_t
    123 hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    124 			 hb_codepoint_t      unicode,
    125 			 void               *user_data HB_UNUSED)
    126 {
    127  switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
    128  {
    129  case U_UNASSIGNED:			return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
    130 
    131  case U_UPPERCASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
    132  case U_LOWERCASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
    133  case U_TITLECASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
    134  case U_MODIFIER_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
    135  case U_OTHER_LETTER:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
    136 
    137  case U_NON_SPACING_MARK:		return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
    138  case U_ENCLOSING_MARK:		return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
    139  case U_COMBINING_SPACING_MARK:	return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK;
    140 
    141  case U_DECIMAL_DIGIT_NUMBER:		return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
    142  case U_LETTER_NUMBER:			return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
    143  case U_OTHER_NUMBER:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
    144 
    145  case U_SPACE_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
    146  case U_LINE_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
    147  case U_PARAGRAPH_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
    148 
    149  case U_CONTROL_CHAR:			return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
    150  case U_FORMAT_CHAR:			return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
    151  case U_PRIVATE_USE_CHAR:		return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
    152  case U_SURROGATE:			return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
    153 
    154 
    155  case U_DASH_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
    156  case U_START_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
    157  case U_END_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
    158  case U_CONNECTOR_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
    159  case U_OTHER_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
    160 
    161  case U_MATH_SYMBOL:			return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
    162  case U_CURRENCY_SYMBOL:		return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
    163  case U_MODIFIER_SYMBOL:		return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
    164  case U_OTHER_SYMBOL:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
    165 
    166  case U_INITIAL_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
    167  case U_FINAL_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
    168  }
    169 
    170  return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
    171 }
    172 
    173 static hb_codepoint_t
    174 hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    175 		  hb_codepoint_t      unicode,
    176 		  void               *user_data HB_UNUSED)
    177 {
    178  return u_charMirror(unicode);
    179 }
    180 
    181 static hb_script_t
    182 hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    183 	       hb_codepoint_t      unicode,
    184 	       void               *user_data HB_UNUSED)
    185 {
    186  UErrorCode status = U_ZERO_ERROR;
    187  UScriptCode scriptCode = uscript_getScript(unicode, &status);
    188 
    189  if (unlikely (U_FAILURE (status)))
    190    return HB_SCRIPT_UNKNOWN;
    191 
    192  return hb_icu_script_to_script (scriptCode);
    193 }
    194 
    195 static hb_bool_t
    196 hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    197 		hb_codepoint_t      a,
    198 		hb_codepoint_t      b,
    199 		hb_codepoint_t     *ab,
    200 		void               *user_data)
    201 {
    202  const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
    203  UChar32 ret = unorm2_composePair (normalizer, a, b);
    204  if (ret < 0) return false;
    205  *ab = ret;
    206  return true;
    207 }
    208 
    209 static hb_bool_t
    210 hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    211 		  hb_codepoint_t      ab,
    212 		  hb_codepoint_t     *a,
    213 		  hb_codepoint_t     *b,
    214 		  void               *user_data)
    215 {
    216  const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
    217  UChar decomposed[4];
    218  int len;
    219  UErrorCode icu_err = U_ZERO_ERROR;
    220  len = unorm2_getRawDecomposition (normalizer, ab, decomposed,
    221 			    ARRAY_LENGTH (decomposed), &icu_err);
    222  if (U_FAILURE (icu_err) || len < 0) return false;
    223 
    224  len = u_countChar32 (decomposed, len);
    225  if (len == 1)
    226  {
    227    U16_GET_UNSAFE (decomposed, 0, *a);
    228    *b = 0;
    229    return *a != ab;
    230  }
    231  else if (len == 2)
    232  {
    233    len = 0;
    234    U16_NEXT_UNSAFE (decomposed, len, *a);
    235    U16_NEXT_UNSAFE (decomposed, len, *b);
    236  }
    237  return true;
    238 }
    239 
    240 
    241 static inline void free_static_icu_funcs ();
    242 
    243 static struct hb_icu_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_icu_unicode_funcs_lazy_loader_t>
    244 {
    245  static hb_unicode_funcs_t *create ()
    246  {
    247    void *user_data = nullptr;
    248    UErrorCode icu_err = U_ZERO_ERROR;
    249    user_data = (void *) unorm2_getNFCInstance (&icu_err);
    250    assert (user_data);
    251 
    252    hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
    253 
    254    hb_unicode_funcs_set_combining_class_func (funcs, hb_icu_unicode_combining_class, nullptr, nullptr);
    255    hb_unicode_funcs_set_general_category_func (funcs, hb_icu_unicode_general_category, nullptr, nullptr);
    256    hb_unicode_funcs_set_mirroring_func (funcs, hb_icu_unicode_mirroring, nullptr, nullptr);
    257    hb_unicode_funcs_set_script_func (funcs, hb_icu_unicode_script, nullptr, nullptr);
    258    hb_unicode_funcs_set_compose_func (funcs, hb_icu_unicode_compose, user_data, nullptr);
    259    hb_unicode_funcs_set_decompose_func (funcs, hb_icu_unicode_decompose, user_data, nullptr);
    260 
    261    hb_unicode_funcs_make_immutable (funcs);
    262 
    263    hb_atexit (free_static_icu_funcs);
    264 
    265    return funcs;
    266  }
    267 } static_icu_funcs;
    268 
    269 static inline
    270 void free_static_icu_funcs ()
    271 {
    272  static_icu_funcs.free_instance ();
    273 }
    274 
    275 /**
    276 * hb_icu_get_unicode_funcs:
    277 *
    278 * Fetches a Unicode-functions structure that is populated
    279 * with the appropriate ICU function for each method.
    280 *
    281 * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
    282 *
    283 * Since: 0.9.38
    284 **/
    285 hb_unicode_funcs_t *
    286 hb_icu_get_unicode_funcs ()
    287 {
    288  return static_icu_funcs.get_unconst ();
    289 }
    290 
    291 #pragma GCC diagnostic pop
    292 
    293 #endif