tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uscript.cpp (5668B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1997-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * File USCRIPT.C
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   07/06/2001    Ram         Creation.
     15 ******************************************************************************
     16 */
     17 
     18 #include "unicode/uchar.h"
     19 #include "unicode/uscript.h"
     20 #include "unicode/uloc.h"
     21 #include "charstr.h"
     22 #include "cmemory.h"
     23 #include "cstring.h"
     24 #include "ulocimp.h"
     25 
     26 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
     27 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
     28 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
     29 
     30 static int32_t
     31 setCodes(const UScriptCode *src, int32_t length,
     32         UScriptCode *dest, int32_t capacity, UErrorCode *err) {
     33    int32_t i;
     34    if(U_FAILURE(*err)) { return 0; }
     35    if(length > capacity) {
     36        *err = U_BUFFER_OVERFLOW_ERROR;
     37        return length;
     38    }
     39    for(i = 0; i < length; ++i) {
     40        dest[i] = src[i];
     41    }
     42    return length;
     43 }
     44 
     45 static int32_t
     46 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
     47    if(U_FAILURE(*err)) { return 0; }
     48    if(1 > capacity) {
     49        *err = U_BUFFER_OVERFLOW_ERROR;
     50        return 1;
     51    }
     52    scripts[0] = script;
     53    return 1;
     54 }
     55 
     56 static int32_t
     57 getCodesFromLocale(const char *locale,
     58                   UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
     59    if (U_FAILURE(*err)) { return 0; }
     60    icu::CharString lang;
     61    icu::CharString script;
     62    if (locale == nullptr) {
     63        locale = uloc_getDefault();
     64    }
     65    ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err);
     66    if (U_FAILURE(*err)) { return 0; }
     67    // Multi-script languages, equivalent to the LocaleScript data
     68    // that we used to load from locale resource bundles.
     69    if (lang == "ja") {
     70        return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
     71    }
     72    if (lang == "ko") {
     73        return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
     74    }
     75    if (lang == "zh" && script == "Hant") {
     76        return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
     77    }
     78    // Explicit script code.
     79    if (!script.isEmpty()) {
     80        UScriptCode scriptCode = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, script.data()));
     81        if(scriptCode != USCRIPT_INVALID_CODE) {
     82            if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
     83                scriptCode = USCRIPT_HAN;
     84            }
     85            return setOneCode(scriptCode, scripts, capacity, err);
     86        }
     87    }
     88    return 0;
     89 }
     90 
     91 /* TODO: this is a bad API and should be deprecated, ticket #11141 */
     92 U_CAPI int32_t  U_EXPORT2
     93 uscript_getCode(const char* nameOrAbbrOrLocale,
     94                UScriptCode* fillIn,
     95                int32_t capacity,
     96                UErrorCode* err){
     97    UBool triedCode;
     98    UErrorCode internalErrorCode;
     99    int32_t length;
    100 
    101    if(U_FAILURE(*err)) {
    102        return 0;
    103    }
    104    if(nameOrAbbrOrLocale==nullptr ||
    105            (fillIn == nullptr ? capacity != 0 : capacity < 0)) {
    106        *err = U_ILLEGAL_ARGUMENT_ERROR;
    107        return 0;
    108    }
    109 
    110    triedCode = false;
    111    const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-');
    112    if (lastSepPtr==nullptr) {
    113        lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_');
    114    }
    115    // Favor interpretation of nameOrAbbrOrLocale as a script alias if either
    116    // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc.
    117    // 2. The last instance of -/_ is at offset 3, and the portion after that is
    118    //    longer than 4 characters (i.e. not a script or region code). This handles
    119    //    Old_Hungarian, Old_Italic, etc. ("old" is a valid language code)
    120    // 3. The last instance of -/_ is at offset 7, and the portion after that is
    121    //    3 characters. This handles New_Tai_Lue ("new" is a valid language code).
    122    if (lastSepPtr==nullptr
    123            || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8)
    124            || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) {
    125        /* try long and abbreviated script names first */
    126        UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
    127        if(code!=USCRIPT_INVALID_CODE) {
    128            return setOneCode(code, fillIn, capacity, err);
    129        }
    130        triedCode = true;
    131    }
    132    internalErrorCode = U_ZERO_ERROR;
    133    length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
    134    if(U_FAILURE(*err) || length != 0) {
    135        return length;
    136    }
    137    icu::CharString likely = ulocimp_addLikelySubtags(nameOrAbbrOrLocale, internalErrorCode);
    138    if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
    139        length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
    140        if(U_FAILURE(*err) || length != 0) {
    141            return length;
    142        }
    143    }
    144    if(!triedCode) {
    145        /* still not found .. try long and abbreviated script names again */
    146        UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
    147        if(code!=USCRIPT_INVALID_CODE) {
    148            return setOneCode(code, fillIn, capacity, err);
    149        }
    150    }
    151    return 0;
    152 }