uscript_props.cpp (10246B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2013-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: uscript_props.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2013feb16 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 #include "unicode/unistr.h" 19 #include "unicode/uscript.h" 20 #include "unicode/utf16.h" 21 #include "ustr_imp.h" 22 #include "cmemory.h" 23 24 namespace { 25 26 // Script metadata (script properties). 27 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 28 29 // 0 = NOT_ENCODED, no sample character, default false script properties. 30 // Bits 20.. 0: sample character 31 32 // Bits 23..21: usage 33 const int32_t UNKNOWN = 1 << 21; 34 const int32_t EXCLUSION = 2 << 21; 35 const int32_t LIMITED_USE = 3 << 21; 36 // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 37 const int32_t RECOMMENDED = 5 << 21; 38 39 // Bits 31..24: Single-bit flags 40 const int32_t RTL = 1 << 24; 41 const int32_t LB_LETTERS = 1 << 25; 42 const int32_t CASED = 1 << 26; 43 44 const int32_t SCRIPT_PROPS[] = { 45 // Begin copy-paste output from 46 // tools/trunk/unicode/py/parsescriptmetadata.py 47 0x0040 | RECOMMENDED, // Zyyy 48 0x030F | RECOMMENDED, // Zinh 49 0x0628 | RECOMMENDED | RTL, // Arab 50 0x0531 | RECOMMENDED | CASED, // Armn 51 0x0995 | RECOMMENDED, // Beng 52 0x3105 | LIMITED_USE | LB_LETTERS, // Bopo 53 0x13C4 | LIMITED_USE | CASED, // Cher 54 0x03E2 | EXCLUSION | CASED, // Copt 55 0x042F | RECOMMENDED | CASED, // Cyrl 56 0x10414 | EXCLUSION | CASED, // Dsrt 57 0x0905 | RECOMMENDED, // Deva 58 0x12A0 | RECOMMENDED, // Ethi 59 0x10D3 | RECOMMENDED, // Geor 60 0x10330 | EXCLUSION, // Goth 61 0x03A9 | RECOMMENDED | CASED, // Grek 62 0x0A95 | RECOMMENDED, // Gujr 63 0x0A15 | RECOMMENDED, // Guru 64 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 65 0xAC00 | RECOMMENDED, // Hang 66 0x05D0 | RECOMMENDED | RTL, // Hebr 67 0x304B | RECOMMENDED | LB_LETTERS, // Hira 68 0x0C95 | RECOMMENDED, // Knda 69 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 70 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 71 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 72 0x004C | RECOMMENDED | CASED, // Latn 73 0x0D15 | RECOMMENDED, // Mlym 74 0x1826 | EXCLUSION, // Mong 75 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 76 0x168F | EXCLUSION, // Ogam 77 0x10300 | EXCLUSION, // Ital 78 0x0B15 | RECOMMENDED, // Orya 79 0x16A0 | EXCLUSION, // Runr 80 0x0D85 | RECOMMENDED, // Sinh 81 0x0710 | LIMITED_USE | RTL, // Syrc 82 0x0B95 | RECOMMENDED, // Taml 83 0x0C15 | RECOMMENDED, // Telu 84 0x078C | RECOMMENDED | RTL, // Thaa 85 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 86 0x0F40 | RECOMMENDED, // Tibt 87 0x14C0 | LIMITED_USE, // Cans 88 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii 89 0x1703 | EXCLUSION, // Tglg 90 0x1723 | EXCLUSION, // Hano 91 0x1743 | EXCLUSION, // Buhd 92 0x1763 | EXCLUSION, // Tagb 93 0x280E | UNKNOWN, // Brai 94 0x10800 | EXCLUSION | RTL, // Cprt 95 0x1900 | LIMITED_USE, // Limb 96 0x10000 | EXCLUSION, // Linb 97 0x10480 | EXCLUSION, // Osma 98 0x10450 | EXCLUSION, // Shaw 99 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 100 0x10380 | EXCLUSION, // Ugar 101 0, 102 0x1A00 | EXCLUSION, // Bugi 103 0x2C00 | EXCLUSION | CASED, // Glag 104 0x10A00 | EXCLUSION | RTL, // Khar 105 0xA800 | LIMITED_USE, // Sylo 106 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 107 0x2D30 | LIMITED_USE, // Tfng 108 0x103A0 | EXCLUSION, // Xpeo 109 0x1B05 | LIMITED_USE, // Bali 110 0x1BC0 | LIMITED_USE, // Batk 111 0, 112 0x11005 | EXCLUSION, // Brah 113 0xAA00 | LIMITED_USE, // Cham 114 0, 115 0, 116 0, 117 0, 118 0x13153 | EXCLUSION, // Egyp 119 0, 120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 121 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 122 0x16B1C | EXCLUSION, // Hmng 123 0x10CA1 | EXCLUSION | RTL | CASED, // Hung 124 0, 125 0xA984 | LIMITED_USE, // Java 126 0xA90A | LIMITED_USE, // Kali 127 0, 128 0, 129 0x1C00 | LIMITED_USE, // Lepc 130 0x10647 | EXCLUSION, // Lina 131 0x0840 | LIMITED_USE | RTL, // Mand 132 0, 133 0x10980 | EXCLUSION | RTL, // Mero 134 0x07CA | LIMITED_USE | RTL, // Nkoo 135 0x10C00 | EXCLUSION | RTL, // Orkh 136 0x1036B | EXCLUSION, // Perm 137 0xA840 | EXCLUSION, // Phag 138 0x10900 | EXCLUSION | RTL, // Phnx 139 0x16F00 | LIMITED_USE, // Plrd 140 0, 141 0, 142 0, 143 0, 144 0, 145 0, 146 0xA549 | LIMITED_USE, // Vaii 147 0, 148 0x12000 | EXCLUSION, // Xsux 149 0, 150 0xFDD0 | UNKNOWN, // Zzzz 151 0x102A0 | EXCLUSION, // Cari 152 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 153 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 154 0x10280 | EXCLUSION, // Lyci 155 0x10920 | EXCLUSION | RTL, // Lydi 156 0x1C5A | LIMITED_USE, // Olck 157 0xA930 | EXCLUSION, // Rjng 158 0xA882 | LIMITED_USE, // Saur 159 0x1D850 | EXCLUSION, // Sgnw 160 0x1B83 | LIMITED_USE, // Sund 161 0, 162 0xABC0 | LIMITED_USE, // Mtei 163 0x10840 | EXCLUSION | RTL, // Armi 164 0x10B00 | EXCLUSION | RTL, // Avst 165 0x11103 | LIMITED_USE, // Cakm 166 0xAC00 | RECOMMENDED, // Kore 167 0x11083 | EXCLUSION, // Kthi 168 0x10AD8 | EXCLUSION | RTL, // Mani 169 0x10B60 | EXCLUSION | RTL, // Phli 170 0x10B8F | EXCLUSION | RTL, // Phlp 171 0, 172 0x10B40 | EXCLUSION | RTL, // Prti 173 0x0800 | EXCLUSION | RTL, // Samr 174 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 175 0, 176 0, 177 0xA6A0 | LIMITED_USE, // Bamu 178 0xA4D0 | LIMITED_USE, // Lisu 179 0, 180 0x10A60 | EXCLUSION | RTL, // Sarb 181 0x16AE6 | EXCLUSION, // Bass 182 0x1BC20 | EXCLUSION, // Dupl 183 0x10500 | EXCLUSION, // Elba 184 0x11315 | EXCLUSION, // Gran 185 0, 186 0, 187 0x1E802 | EXCLUSION | RTL, // Mend 188 0x109A0 | EXCLUSION | RTL, // Merc 189 0x10A95 | EXCLUSION | RTL, // Narb 190 0x10896 | EXCLUSION | RTL, // Nbat 191 0x10873 | EXCLUSION | RTL, // Palm 192 0x112BE | EXCLUSION, // Sind 193 0x118B4 | EXCLUSION | CASED, // Wara 194 0, 195 0, 196 0x16A4F | EXCLUSION, // Mroo 197 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu 198 0x11183 | EXCLUSION, // Shrd 199 0x110D0 | EXCLUSION, // Sora 200 0x11680 | EXCLUSION, // Takr 201 0x18229 | EXCLUSION | LB_LETTERS, // Tang 202 0, 203 0x14400 | EXCLUSION, // Hluw 204 0x11208 | EXCLUSION, // Khoj 205 0x11484 | EXCLUSION, // Tirh 206 0x10537 | EXCLUSION, // Aghb 207 0x11152 | EXCLUSION, // Mahj 208 0x11717 | EXCLUSION | LB_LETTERS, // Ahom 209 0x108F4 | EXCLUSION | RTL, // Hatr 210 0x1160E | EXCLUSION, // Modi 211 0x1128F | EXCLUSION, // Mult 212 0x11AC0 | EXCLUSION, // Pauc 213 0x1158E | EXCLUSION, // Sidd 214 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm 215 0x11C0E | EXCLUSION, // Bhks 216 0x11C72 | EXCLUSION, // Marc 217 0x11412 | LIMITED_USE, // Newa 218 0x104B5 | LIMITED_USE | CASED, // Osge 219 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb 220 0x1112 | RECOMMENDED, // Jamo 221 0, 222 0x11D10 | EXCLUSION, // Gonm 223 0x11A5C | EXCLUSION, // Soyo 224 0x11A0B | EXCLUSION, // Zanb 225 0x1180B | EXCLUSION, // Dogr 226 0x11D71 | EXCLUSION, // Gong 227 0x11EE5 | EXCLUSION, // Maka 228 0x16E40 | EXCLUSION | CASED, // Medf 229 0x10D12 | LIMITED_USE | RTL, // Rohg 230 0x10F42 | EXCLUSION | RTL, // Sogd 231 0x10F19 | EXCLUSION | RTL, // Sogo 232 0x10FF1 | EXCLUSION | RTL, // Elym 233 0x1E108 | LIMITED_USE, // Hmnp 234 0x119CE | EXCLUSION, // Nand 235 0x1E2E1 | LIMITED_USE, // Wcho 236 0x10FBF | EXCLUSION | RTL, // Chrs 237 0x1190C | EXCLUSION, // Diak 238 0x18C65 | EXCLUSION | LB_LETTERS, // Kits 239 0x10E88 | EXCLUSION | RTL, // Yezi 240 0x12FE5 | EXCLUSION, // Cpmn 241 0x10F7C | EXCLUSION | RTL, // Ougr 242 0x16ABC | EXCLUSION, // Tnsa 243 0x1E290 | EXCLUSION, // Toto 244 0x10582 | EXCLUSION | CASED, // Vith 245 0x11F1B | EXCLUSION | LB_LETTERS, // Kawi 246 0x1E4E6 | EXCLUSION, // Nagm 247 0, 248 0x10D5D | EXCLUSION | RTL | CASED, // Gara 249 0x1611C | EXCLUSION, // Gukh 250 0x16D45 | EXCLUSION, // Krai 251 0x1E5D0 | EXCLUSION, // Onao 252 0x11BC4 | EXCLUSION, // Sunu 253 0x105C2 | EXCLUSION, // Todr 254 0x11392 | EXCLUSION, // Tutg 255 0x16EA1 | EXCLUSION | CASED, // Berf 256 0x10950 | EXCLUSION | RTL, // Sidt 257 0x1E6D5 | EXCLUSION | LB_LETTERS, // Tayo 258 0x11DC6 | EXCLUSION, // Tols 259 // End copy-paste from parsescriptmetadata.py 260 }; 261 262 int32_t getScriptProps(UScriptCode script) { 263 if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { 264 return SCRIPT_PROPS[script]; 265 } else { 266 return 0; 267 } 268 } 269 270 } // namespace 271 272 U_CAPI int32_t U_EXPORT2 273 uscript_getSampleString(UScriptCode script, char16_t *dest, int32_t capacity, UErrorCode *pErrorCode) { 274 if(U_FAILURE(*pErrorCode)) { return 0; } 275 if(capacity < 0 || (capacity > 0 && dest == nullptr)) { 276 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 277 return 0; 278 } 279 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 280 int32_t length; 281 if(sampleChar == 0) { 282 length = 0; 283 } else { 284 length = U16_LENGTH(sampleChar); 285 if(length <= capacity) { 286 int32_t i = 0; 287 U16_APPEND_UNSAFE(dest, i, sampleChar); 288 } 289 } 290 return u_terminateUChars(dest, capacity, length, pErrorCode); 291 } 292 293 U_COMMON_API icu::UnicodeString U_EXPORT2 294 uscript_getSampleUnicodeString(UScriptCode script) { 295 icu::UnicodeString sample; 296 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 297 if(sampleChar != 0) { 298 sample.append(sampleChar); 299 } 300 return sample; 301 } 302 303 U_CAPI UScriptUsage U_EXPORT2 304 uscript_getUsage(UScriptCode script) { 305 return (UScriptUsage)((getScriptProps(script) >> 21) & 7); 306 } 307 308 U_CAPI UBool U_EXPORT2 309 uscript_isRightToLeft(UScriptCode script) { 310 return (getScriptProps(script) & RTL) != 0; 311 } 312 313 U_CAPI UBool U_EXPORT2 314 uscript_breaksBetweenLetters(UScriptCode script) { 315 return (getScriptProps(script) & LB_LETTERS) != 0; 316 } 317 318 U_CAPI UBool U_EXPORT2 319 uscript_isCased(UScriptCode script) { 320 return (getScriptProps(script) & CASED) != 0; 321 }