tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hb-ot-tag.cc (18546B)


      1 /*
      2 * Copyright © 2009  Red Hat, Inc.
      3 * Copyright © 2011  Google, Inc.
      4 *
      5 *  This is part of HarfBuzz, a text shaping library.
      6 *
      7 * Permission is hereby granted, without written agreement and without
      8 * license or royalty fees, to use, copy, modify, and distribute this
      9 * software and its documentation for any purpose, provided that the
     10 * above copyright notice and the following two paragraphs appear in
     11 * all copies of this software.
     12 *
     13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     17 * DAMAGE.
     18 *
     19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     24 *
     25 * Red Hat Author(s): Behdad Esfahbod
     26 * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
     27 */
     28 
     29 #include "hb.hh"
     30 
     31 #ifndef HB_NO_OT_TAG
     32 
     33 
     34 /* hb_script_t */
     35 
     36 static hb_tag_t
     37 hb_ot_old_tag_from_script (hb_script_t script)
     38 {
     39  /* This seems to be accurate as of end of 2012. */
     40 
     41  switch ((hb_tag_t) script)
     42  {
     43    case HB_SCRIPT_INVALID:		return HB_OT_TAG_DEFAULT_SCRIPT;
     44    case HB_SCRIPT_MATH:		return HB_OT_TAG_MATH_SCRIPT;
     45 
     46    /* KATAKANA and HIRAGANA both map to 'kana' */
     47    case HB_SCRIPT_HIRAGANA:		return HB_TAG('k','a','n','a');
     48 
     49    /* Spaces at the end are preserved, unlike ISO 15924 */
     50    case HB_SCRIPT_LAO:			return HB_TAG('l','a','o',' ');
     51    case HB_SCRIPT_YI:			return HB_TAG('y','i',' ',' ');
     52    /* Unicode-5.0 additions */
     53    case HB_SCRIPT_NKO:			return HB_TAG('n','k','o',' ');
     54    /* Unicode-5.1 additions */
     55    case HB_SCRIPT_VAI:			return HB_TAG('v','a','i',' ');
     56  }
     57 
     58  /* Else, just change first char to lowercase and return */
     59  return ((hb_tag_t) script) | 0x20000000u;
     60 }
     61 
     62 static hb_script_t
     63 hb_ot_old_tag_to_script (hb_tag_t tag)
     64 {
     65  if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
     66    return HB_SCRIPT_INVALID;
     67  if (unlikely (tag == HB_OT_TAG_MATH_SCRIPT))
     68    return HB_SCRIPT_MATH;
     69 
     70  /* This side of the conversion is fully algorithmic. */
     71 
     72  /* Any spaces at the end of the tag are replaced by repeating the last
     73   * letter.  Eg 'nko ' -> 'Nkoo' */
     74  if (unlikely ((tag & 0x0000FF00u) == 0x00002000u))
     75    tag |= (tag >> 8) & 0x0000FF00u; /* Copy second letter to third */
     76  if (unlikely ((tag & 0x000000FFu) == 0x00000020u))
     77    tag |= (tag >> 8) & 0x000000FFu; /* Copy third letter to fourth */
     78 
     79  /* Change first char to uppercase and return */
     80  return (hb_script_t) (tag & ~0x20000000u);
     81 }
     82 
     83 static hb_tag_t
     84 hb_ot_new_tag_from_script (hb_script_t script)
     85 {
     86  switch ((hb_tag_t) script) {
     87    case HB_SCRIPT_BENGALI:		return HB_TAG('b','n','g','2');
     88    case HB_SCRIPT_DEVANAGARI:		return HB_TAG('d','e','v','2');
     89    case HB_SCRIPT_GUJARATI:		return HB_TAG('g','j','r','2');
     90    case HB_SCRIPT_GURMUKHI:		return HB_TAG('g','u','r','2');
     91    case HB_SCRIPT_KANNADA:		return HB_TAG('k','n','d','2');
     92    case HB_SCRIPT_MALAYALAM:		return HB_TAG('m','l','m','2');
     93    case HB_SCRIPT_ORIYA:		return HB_TAG('o','r','y','2');
     94    case HB_SCRIPT_TAMIL:		return HB_TAG('t','m','l','2');
     95    case HB_SCRIPT_TELUGU:		return HB_TAG('t','e','l','2');
     96    case HB_SCRIPT_MYANMAR:		return HB_TAG('m','y','m','2');
     97  }
     98 
     99  return HB_OT_TAG_DEFAULT_SCRIPT;
    100 }
    101 
    102 static hb_script_t
    103 hb_ot_new_tag_to_script (hb_tag_t tag)
    104 {
    105  switch (tag) {
    106    case HB_TAG('b','n','g','2'):	return HB_SCRIPT_BENGALI;
    107    case HB_TAG('d','e','v','2'):	return HB_SCRIPT_DEVANAGARI;
    108    case HB_TAG('g','j','r','2'):	return HB_SCRIPT_GUJARATI;
    109    case HB_TAG('g','u','r','2'):	return HB_SCRIPT_GURMUKHI;
    110    case HB_TAG('k','n','d','2'):	return HB_SCRIPT_KANNADA;
    111    case HB_TAG('m','l','m','2'):	return HB_SCRIPT_MALAYALAM;
    112    case HB_TAG('o','r','y','2'):	return HB_SCRIPT_ORIYA;
    113    case HB_TAG('t','m','l','2'):	return HB_SCRIPT_TAMIL;
    114    case HB_TAG('t','e','l','2'):	return HB_SCRIPT_TELUGU;
    115    case HB_TAG('m','y','m','2'):	return HB_SCRIPT_MYANMAR;
    116  }
    117 
    118  return HB_SCRIPT_UNKNOWN;
    119 }
    120 
    121 #ifndef HB_DISABLE_DEPRECATED
    122 /**
    123 * hb_ot_tags_from_script:
    124 * @script: an #hb_script_t to convert.
    125 * @script_tag_1: (out): output #hb_tag_t.
    126 * @script_tag_2: (out): output #hb_tag_t.
    127 *
    128 * Converts an #hb_script_t to script tags.
    129 *
    130 * Since: 0.6.0
    131 * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead
    132 **/
    133 void
    134 hb_ot_tags_from_script (hb_script_t  script,
    135 		hb_tag_t    *script_tag_1,
    136 		hb_tag_t    *script_tag_2)
    137 {
    138  unsigned int count = 2;
    139  hb_tag_t tags[2];
    140  hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
    141  *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
    142  *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
    143 }
    144 #endif
    145 
    146 /*
    147 * Complete list at:
    148 * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
    149 *
    150 * Most of the script tags are the same as the ISO 15924 tag but lowercased.
    151 * So we just do that, and handle the exceptional cases in a switch.
    152 */
    153 
    154 static void
    155 hb_ot_all_tags_from_script (hb_script_t   script,
    156 		    unsigned int *count /* IN/OUT */,
    157 		    hb_tag_t     *tags /* OUT */)
    158 {
    159  unsigned int i = 0;
    160 
    161  hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
    162  if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
    163  {
    164    /* HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. */
    165    if (new_tag != HB_TAG('m','y','m','2'))
    166      tags[i++] = new_tag | '3';
    167    if (*count > i)
    168      tags[i++] = new_tag;
    169  }
    170 
    171  if (*count > i)
    172  {
    173    hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
    174    if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
    175      tags[i++] = old_tag;
    176  }
    177 
    178  *count = i;
    179 }
    180 
    181 /**
    182 * hb_ot_tag_to_script:
    183 * @tag: a script tag
    184 *
    185 * Converts a script tag to an #hb_script_t.
    186 *
    187 * Return value: The #hb_script_t corresponding to @tag.
    188 *
    189 **/
    190 hb_script_t
    191 hb_ot_tag_to_script (hb_tag_t tag)
    192 {
    193  unsigned char digit = tag & 0x000000FFu;
    194  if (unlikely (digit == '2' || digit == '3'))
    195    return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
    196 
    197  return hb_ot_old_tag_to_script (tag);
    198 }
    199 
    200 
    201 /* hb_language_t */
    202 
    203 static inline bool
    204 subtag_matches (const char *lang_str,
    205 	const char *limit,
    206 	const char *subtag,
    207 	unsigned    subtag_len)
    208 {
    209  if (likely ((unsigned) (limit - lang_str) < subtag_len))
    210    return false;
    211 
    212  do {
    213    const char *s = strstr (lang_str, subtag);
    214    if (!s || s >= limit)
    215      return false;
    216    if (!ISALNUM (s[subtag_len]))
    217      return true;
    218    lang_str = s + subtag_len;
    219  } while (true);
    220 }
    221 
    222 static bool
    223 lang_matches (const char *lang_str,
    224       const char *limit,
    225       const char *spec,
    226       unsigned    spec_len)
    227 {
    228  /* Same as hb_language_matches(); duplicated. */
    229 
    230  if (likely ((unsigned) (limit - lang_str) < spec_len))
    231    return false;
    232 
    233  return strncmp (lang_str, spec, spec_len) == 0 &&
    234  (lang_str[spec_len] == '\0' || lang_str[spec_len] == '-');
    235 }
    236 
    237 struct LangTag
    238 {
    239  hb_tag_t language;
    240  hb_tag_t tag;
    241 
    242  int cmp (hb_tag_t a) const
    243  {
    244    return a < this->language ? -1 : a > this->language ? +1 : 0;
    245  }
    246  int cmp (const LangTag *that) const
    247  { return cmp (that->language); }
    248 };
    249 
    250 #include "hb-ot-tag-table.hh"
    251 
    252 /* The corresponding languages IDs for the following IDs are unclear,
    253 * overlap, or are architecturally weird. Needs more research. */
    254 
    255 /*{"??",	{HB_TAG('B','C','R',' ')}},*/	/* Bible Cree */
    256 /*{"zh?",	{HB_TAG('C','H','N',' ')}},*/	/* Chinese (seen in Microsoft fonts) */
    257 /*{"ar-Syrc?",	{HB_TAG('G','A','R',' ')}},*/	/* Garshuni */
    258 /*{"??",	{HB_TAG('N','G','R',' ')}},*/	/* Nagari */
    259 /*{"??",	{HB_TAG('Y','I','C',' ')}},*/	/* Yi Classic */
    260 /*{"zh?",	{HB_TAG('Z','H','P',' ')}},*/	/* Chinese Phonetic */
    261 
    262 #ifndef HB_DISABLE_DEPRECATED
    263 /**
    264 * hb_ot_tag_from_language:
    265 * @language: an #hb_language_t to convert.
    266 *
    267 * Converts an #hb_language_t to an #hb_tag_t.
    268 *
    269 * Since: 0.6.0
    270 * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead
    271 **/
    272 hb_tag_t
    273 hb_ot_tag_from_language (hb_language_t language)
    274 {
    275  unsigned int count = 1;
    276  hb_tag_t tags[1];
    277  hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
    278  return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
    279 }
    280 #endif
    281 
    282 static void
    283 hb_ot_tags_from_language (const char   *lang_str,
    284 		  const char   *limit,
    285 		  unsigned int *count,
    286 		  hb_tag_t     *tags)
    287 {
    288 
    289 #ifndef HB_NO_LANGUAGE_LONG
    290  /* Check for matches of multiple subtags. */
    291  if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
    292    return;
    293 #endif
    294 
    295  /* Find a language matching in the first component. */
    296 #ifndef HB_NO_LANGUAGE_LONG
    297  const char *s; s = strchr (lang_str, '-');
    298 #endif
    299  {
    300 #ifndef HB_NO_LANGUAGE_LONG
    301    if (s && limit - lang_str >= 6)
    302    {
    303      const char *extlang_end = strchr (s + 1, '-');
    304      /* If there is an extended language tag, use it. */
    305      if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
    306   ISALPHA (s[1]))
    307 lang_str = s + 1;
    308    }
    309 #endif
    310    const LangTag *ot_languages = nullptr;
    311    unsigned ot_languages_len = 0;
    312    const char *dash = strchr (lang_str, '-');
    313    unsigned first_len = dash ? dash - lang_str : limit - lang_str;
    314    if (first_len == 2)
    315    {
    316      ot_languages = ot_languages2;
    317      ot_languages_len = ARRAY_LENGTH (ot_languages2);
    318    }
    319 #ifndef HB_NO_LANGUAGE_LONG
    320    else if (first_len == 3)
    321    {
    322      ot_languages = ot_languages3;
    323      ot_languages_len = ARRAY_LENGTH (ot_languages3);
    324    }
    325 #endif
    326 
    327    hb_tag_t lang_tag = hb_tag_from_string (lang_str, first_len);
    328 
    329    static hb_atomic_t<unsigned> last_tag_idx = 0; /* Poor man's cache. */
    330    unsigned tag_idx = last_tag_idx;
    331 
    332    if (likely (tag_idx < ot_languages_len && ot_languages[tag_idx].language == lang_tag) ||
    333 hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_tag, &tag_idx))
    334    {
    335      last_tag_idx = tag_idx;
    336      unsigned int i;
    337      while (tag_idx != 0 &&
    338      ot_languages[tag_idx].language == ot_languages[tag_idx - 1].language)
    339 tag_idx--;
    340      for (i = 0;
    341    i < *count &&
    342    tag_idx + i < ot_languages_len &&
    343    ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
    344    ot_languages[tag_idx + i].language == ot_languages[tag_idx].language;
    345    i++)
    346 tags[i] = ot_languages[tag_idx + i].tag;
    347      *count = i;
    348      return;
    349    }
    350  }
    351 
    352 #ifndef HB_NO_LANGUAGE_LONG
    353  if (!s)
    354    s = lang_str + strlen (lang_str);
    355  if (s - lang_str == 3) {
    356    /* Assume it's ISO-639-3 and upper-case and use it. */
    357    tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
    358    *count = 1;
    359    return;
    360  }
    361 #endif
    362 
    363  *count = 0;
    364 }
    365 
    366 static bool
    367 parse_private_use_subtag (const char     *private_use_subtag,
    368 		  unsigned int   *count,
    369 		  hb_tag_t       *tags,
    370 		  const char     *prefix,
    371 		  unsigned char (*normalize) (unsigned char))
    372 {
    373 #ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
    374  return false;
    375 #endif
    376 
    377  if (!(private_use_subtag && count && tags && *count)) return false;
    378 
    379  const char *s = strstr (private_use_subtag, prefix);
    380  if (!s) return false;
    381 
    382  char tag[4];
    383  int i;
    384  s += strlen (prefix);
    385  if (s[0] == '-') {
    386    s += 1;
    387    char c;
    388    for (i = 0; i < 8 && ISHEX (s[i]); i++)
    389    {
    390      c = FROMHEX (s[i]);
    391      if (i % 2 == 0)
    392 tag[i / 2] = c << 4;
    393      else
    394 tag[i / 2] += c;
    395    }
    396    if (i != 8) return false;
    397  } else {
    398    for (i = 0; i < 4 && ISALNUM (s[i]); i++)
    399      tag[i] = normalize (s[i]);
    400    if (!i) return false;
    401 
    402    for (; i < 4; i++)
    403      tag[i] = ' ';
    404  }
    405  tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
    406  if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
    407    tags[0] ^= ~0xDFDFDFDF;
    408  *count = 1;
    409  return true;
    410 }
    411 
    412 /**
    413 * hb_ot_tags_from_script_and_language:
    414 * @script: an #hb_script_t to convert.
    415 * @language: (nullable): an #hb_language_t to convert.
    416 * @script_count: (inout) (optional): maximum number of script tags to retrieve (IN)
    417 * and actual number of script tags retrieved (OUT)
    418 * @script_tags: (out) (optional): array of size at least @script_count to store the
    419 * script tag results
    420 * @language_count: (inout) (optional): maximum number of language tags to retrieve
    421 * (IN) and actual number of language tags retrieved (OUT)
    422 * @language_tags: (out) (optional): array of size at least @language_count to store
    423 * the language tag results
    424 *
    425 * Converts an #hb_script_t and an #hb_language_t to script and language tags.
    426 *
    427 * Since: 2.0.0
    428 **/
    429 void
    430 hb_ot_tags_from_script_and_language (hb_script_t   script,
    431 			     hb_language_t language,
    432 			     unsigned int *script_count /* IN/OUT */,
    433 			     hb_tag_t     *script_tags /* OUT */,
    434 			     unsigned int *language_count /* IN/OUT */,
    435 			     hb_tag_t     *language_tags /* OUT */)
    436 {
    437  bool needs_script = true;
    438 
    439  if (language == HB_LANGUAGE_INVALID)
    440  {
    441    if (language_count && language_tags && *language_count)
    442      *language_count = 0;
    443  }
    444  else
    445  {
    446    const char *lang_str, *s, *limit, *private_use_subtag;
    447    bool needs_language;
    448 
    449    lang_str = hb_language_to_string (language);
    450    limit = nullptr;
    451    private_use_subtag = nullptr;
    452    if (lang_str[0] == 'x' && lang_str[1] == '-')
    453    {
    454      private_use_subtag = lang_str;
    455    } else {
    456      for (s = lang_str + 1; *s; s++)
    457      {
    458 if (s[-1] == '-' && s[1] == '-')
    459 {
    460   if (s[0] == 'x')
    461   {
    462     private_use_subtag = s;
    463     if (!limit)
    464       limit = s - 1;
    465     break;
    466   } else if (!limit)
    467   {
    468     limit = s - 1;
    469   }
    470 }
    471      }
    472      if (!limit)
    473 limit = s;
    474    }
    475 
    476    needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
    477    needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
    478 
    479    if (needs_language && language_count && language_tags && *language_count)
    480      hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
    481  }
    482 
    483  if (needs_script && script_count && script_tags && *script_count)
    484    hb_ot_all_tags_from_script (script, script_count, script_tags);
    485 }
    486 
    487 /**
    488 * hb_ot_tag_to_language:
    489 * @tag: an language tag
    490 *
    491 * Converts a language tag to an #hb_language_t.
    492 *
    493 * Return value: (transfer none) (nullable):
    494 * The #hb_language_t corresponding to @tag.
    495 *
    496 * Since: 0.9.2
    497 **/
    498 hb_language_t
    499 hb_ot_tag_to_language (hb_tag_t tag)
    500 {
    501  unsigned int i;
    502 
    503  if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
    504    return nullptr;
    505 
    506 #ifndef HB_NO_LANGUAGE_LONG
    507  {
    508    hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
    509    if (disambiguated_tag != HB_LANGUAGE_INVALID)
    510      return disambiguated_tag;
    511  }
    512 #endif
    513 
    514  char buf[4];
    515  for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
    516    if (ot_languages2[i].tag == tag)
    517    {
    518      hb_tag_to_string (ot_languages2[i].language, buf);
    519      return hb_language_from_string (buf, 2);
    520    }
    521 #ifndef HB_NO_LANGUAGE_LONG
    522  for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
    523    if (ot_languages3[i].tag == tag)
    524    {
    525      hb_tag_to_string (ot_languages3[i].language, buf);
    526      return hb_language_from_string (buf, 3);
    527    }
    528 #endif
    529 
    530  /* Return a custom language in the form of "x-hbot-AABBCCDD".
    531   * If it's three letters long, also guess it's ISO 639-3 and lower-case and
    532   * prepend it (if it's not a registered tag, the private use subtags will
    533   * ensure that calling hb_ot_tag_from_language on the result will still return
    534   * the same tag as the original tag).
    535   */
    536  {
    537    char buf[20];
    538    char *str = buf;
    539    if (ISALPHA (tag >> 24)
    540 && ISALPHA ((tag >> 16) & 0xFF)
    541 && ISALPHA ((tag >> 8) & 0xFF)
    542 && (tag & 0xFF) == ' ')
    543    {
    544      buf[0] = TOLOWER (tag >> 24);
    545      buf[1] = TOLOWER ((tag >> 16) & 0xFF);
    546      buf[2] = TOLOWER ((tag >> 8) & 0xFF);
    547      buf[3] = '-';
    548      str += 4;
    549    }
    550    snprintf (str, 16, "x-hbot-%08" PRIx32, tag);
    551    return hb_language_from_string (&*buf, -1);
    552  }
    553 }
    554 
    555 /**
    556 * hb_ot_tags_to_script_and_language:
    557 * @script_tag: a script tag
    558 * @language_tag: a language tag
    559 * @script: (out) (optional): the #hb_script_t corresponding to @script_tag.
    560 * @language: (out) (optional): the #hb_language_t corresponding to @script_tag and
    561 * @language_tag.
    562 *
    563 * Converts a script tag and a language tag to an #hb_script_t and an
    564 * #hb_language_t.
    565 *
    566 * Since: 2.0.0
    567 **/
    568 void
    569 hb_ot_tags_to_script_and_language (hb_tag_t       script_tag,
    570 			   hb_tag_t       language_tag,
    571 			   hb_script_t   *script /* OUT */,
    572 			   hb_language_t *language /* OUT */)
    573 {
    574  hb_script_t script_out = hb_ot_tag_to_script (script_tag);
    575  if (script)
    576    *script = script_out;
    577  if (language)
    578  {
    579    unsigned int script_count = 1;
    580    hb_tag_t primary_script_tag[1];
    581    hb_ot_tags_from_script_and_language (script_out,
    582 				 HB_LANGUAGE_INVALID,
    583 				 &script_count,
    584 				 primary_script_tag,
    585 				 nullptr, nullptr);
    586    *language = hb_ot_tag_to_language (language_tag);
    587    if (script_count == 0 || primary_script_tag[0] != script_tag)
    588    {
    589      unsigned char *buf;
    590      const char *lang_str = hb_language_to_string (*language);
    591      size_t len = strlen (lang_str);
    592      buf = (unsigned char *) hb_malloc (len + 16);
    593      if (unlikely (!buf))
    594      {
    595 *language = nullptr;
    596      }
    597      else
    598      {
    599 int shift;
    600 hb_memcpy (buf, lang_str, len);
    601 if (lang_str[0] != 'x' || lang_str[1] != '-') {
    602   buf[len++] = '-';
    603   buf[len++] = 'x';
    604 }
    605 buf[len++] = '-';
    606 buf[len++] = 'h';
    607 buf[len++] = 'b';
    608 buf[len++] = 's';
    609 buf[len++] = 'c';
    610 buf[len++] = '-';
    611 for (shift = 28; shift >= 0; shift -= 4)
    612   buf[len++] = TOHEX (script_tag >> shift);
    613 *language = hb_language_from_string ((char *) buf, len);
    614 hb_free (buf);
    615      }
    616    }
    617  }
    618 }
    619 
    620 #ifdef MAIN
    621 static inline void
    622 test_langs_sorted ()
    623 {
    624  for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++)
    625  {
    626    int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
    627    if (c > 0)
    628    {
    629      fprintf (stderr, "ot_languages2 not sorted at index %u: %08x %d %08x\n",
    630        i, ot_languages2[i-1].language, c, ot_languages2[i].language);
    631      abort();
    632    }
    633  }
    634 #ifndef HB_NO_LANGUAGE_LONG
    635  for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++)
    636  {
    637    int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
    638    if (c > 0)
    639    {
    640      fprintf (stderr, "ot_languages3 not sorted at index %u: %08x %d %08x\n",
    641        i, ot_languages3[i-1].language, c, ot_languages3[i].language);
    642      abort();
    643    }
    644  }
    645 #endif
    646 }
    647 
    648 int
    649 main ()
    650 {
    651  test_langs_sorted ();
    652  return 0;
    653 }
    654 
    655 #endif
    656 
    657 
    658 #endif