uscript.cpp (5668B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1997-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File USCRIPT.C 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 07/06/2001 Ram Creation. 15 ****************************************************************************** 16 */ 17 18 #include "unicode/uchar.h" 19 #include "unicode/uscript.h" 20 #include "unicode/uloc.h" 21 #include "charstr.h" 22 #include "cmemory.h" 23 #include "cstring.h" 24 #include "ulocimp.h" 25 26 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; 27 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; 28 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; 29 30 static int32_t 31 setCodes(const UScriptCode *src, int32_t length, 32 UScriptCode *dest, int32_t capacity, UErrorCode *err) { 33 int32_t i; 34 if(U_FAILURE(*err)) { return 0; } 35 if(length > capacity) { 36 *err = U_BUFFER_OVERFLOW_ERROR; 37 return length; 38 } 39 for(i = 0; i < length; ++i) { 40 dest[i] = src[i]; 41 } 42 return length; 43 } 44 45 static int32_t 46 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { 47 if(U_FAILURE(*err)) { return 0; } 48 if(1 > capacity) { 49 *err = U_BUFFER_OVERFLOW_ERROR; 50 return 1; 51 } 52 scripts[0] = script; 53 return 1; 54 } 55 56 static int32_t 57 getCodesFromLocale(const char *locale, 58 UScriptCode *scripts, int32_t capacity, UErrorCode *err) { 59 if (U_FAILURE(*err)) { return 0; } 60 icu::CharString lang; 61 icu::CharString script; 62 if (locale == nullptr) { 63 locale = uloc_getDefault(); 64 } 65 ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err); 66 if (U_FAILURE(*err)) { return 0; } 67 // Multi-script languages, equivalent to the LocaleScript data 68 // that we used to load from locale resource bundles. 69 if (lang == "ja") { 70 return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); 71 } 72 if (lang == "ko") { 73 return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); 74 } 75 if (lang == "zh" && script == "Hant") { 76 return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); 77 } 78 // Explicit script code. 79 if (!script.isEmpty()) { 80 UScriptCode scriptCode = static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, script.data())); 81 if(scriptCode != USCRIPT_INVALID_CODE) { 82 if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { 83 scriptCode = USCRIPT_HAN; 84 } 85 return setOneCode(scriptCode, scripts, capacity, err); 86 } 87 } 88 return 0; 89 } 90 91 /* TODO: this is a bad API and should be deprecated, ticket #11141 */ 92 U_CAPI int32_t U_EXPORT2 93 uscript_getCode(const char* nameOrAbbrOrLocale, 94 UScriptCode* fillIn, 95 int32_t capacity, 96 UErrorCode* err){ 97 UBool triedCode; 98 UErrorCode internalErrorCode; 99 int32_t length; 100 101 if(U_FAILURE(*err)) { 102 return 0; 103 } 104 if(nameOrAbbrOrLocale==nullptr || 105 (fillIn == nullptr ? capacity != 0 : capacity < 0)) { 106 *err = U_ILLEGAL_ARGUMENT_ERROR; 107 return 0; 108 } 109 110 triedCode = false; 111 const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-'); 112 if (lastSepPtr==nullptr) { 113 lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_'); 114 } 115 // Favor interpretation of nameOrAbbrOrLocale as a script alias if either 116 // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc. 117 // 2. The last instance of -/_ is at offset 3, and the portion after that is 118 // longer than 4 characters (i.e. not a script or region code). This handles 119 // Old_Hungarian, Old_Italic, etc. ("old" is a valid language code) 120 // 3. The last instance of -/_ is at offset 7, and the portion after that is 121 // 3 characters. This handles New_Tai_Lue ("new" is a valid language code). 122 if (lastSepPtr==nullptr 123 || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8) 124 || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) { 125 /* try long and abbreviated script names first */ 126 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); 127 if(code!=USCRIPT_INVALID_CODE) { 128 return setOneCode(code, fillIn, capacity, err); 129 } 130 triedCode = true; 131 } 132 internalErrorCode = U_ZERO_ERROR; 133 length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); 134 if(U_FAILURE(*err) || length != 0) { 135 return length; 136 } 137 icu::CharString likely = ulocimp_addLikelySubtags(nameOrAbbrOrLocale, internalErrorCode); 138 if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { 139 length = getCodesFromLocale(likely.data(), fillIn, capacity, err); 140 if(U_FAILURE(*err) || length != 0) { 141 return length; 142 } 143 } 144 if(!triedCode) { 145 /* still not found .. try long and abbreviated script names again */ 146 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); 147 if(code!=USCRIPT_INVALID_CODE) { 148 return setOneCode(code, fillIn, capacity, err); 149 } 150 } 151 return 0; 152 }