tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uscript_props.cpp (10246B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2013-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  uscript_props.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2013feb16
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/unistr.h"
     19 #include "unicode/uscript.h"
     20 #include "unicode/utf16.h"
     21 #include "ustr_imp.h"
     22 #include "cmemory.h"
     23 
     24 namespace {
     25 
     26 // Script metadata (script properties).
     27 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
     28 
     29 // 0 = NOT_ENCODED, no sample character, default false script properties.
     30 // Bits 20.. 0: sample character
     31 
     32 // Bits 23..21: usage
     33 const int32_t UNKNOWN = 1 << 21;
     34 const int32_t EXCLUSION = 2 << 21;
     35 const int32_t LIMITED_USE = 3 << 21;
     36 // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
     37 const int32_t RECOMMENDED = 5 << 21;
     38 
     39 // Bits 31..24: Single-bit flags
     40 const int32_t RTL = 1 << 24;
     41 const int32_t LB_LETTERS = 1 << 25;
     42 const int32_t CASED = 1 << 26;
     43 
     44 const int32_t SCRIPT_PROPS[] = {
     45    // Begin copy-paste output from
     46    // tools/trunk/unicode/py/parsescriptmetadata.py
     47    0x0040 | RECOMMENDED,  // Zyyy
     48    0x030F | RECOMMENDED,  // Zinh
     49    0x0628 | RECOMMENDED | RTL,  // Arab
     50    0x0531 | RECOMMENDED | CASED,  // Armn
     51    0x0995 | RECOMMENDED,  // Beng
     52    0x3105 | LIMITED_USE | LB_LETTERS,  // Bopo
     53    0x13C4 | LIMITED_USE | CASED,  // Cher
     54    0x03E2 | EXCLUSION | CASED,  // Copt
     55    0x042F | RECOMMENDED | CASED,  // Cyrl
     56    0x10414 | EXCLUSION | CASED,  // Dsrt
     57    0x0905 | RECOMMENDED,  // Deva
     58    0x12A0 | RECOMMENDED,  // Ethi
     59    0x10D3 | RECOMMENDED,  // Geor
     60    0x10330 | EXCLUSION,  // Goth
     61    0x03A9 | RECOMMENDED | CASED,  // Grek
     62    0x0A95 | RECOMMENDED,  // Gujr
     63    0x0A15 | RECOMMENDED,  // Guru
     64    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
     65    0xAC00 | RECOMMENDED,  // Hang
     66    0x05D0 | RECOMMENDED | RTL,  // Hebr
     67    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
     68    0x0C95 | RECOMMENDED,  // Knda
     69    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
     70    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
     71    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
     72    0x004C | RECOMMENDED | CASED,  // Latn
     73    0x0D15 | RECOMMENDED,  // Mlym
     74    0x1826 | EXCLUSION,  // Mong
     75    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
     76    0x168F | EXCLUSION,  // Ogam
     77    0x10300 | EXCLUSION,  // Ital
     78    0x0B15 | RECOMMENDED,  // Orya
     79    0x16A0 | EXCLUSION,  // Runr
     80    0x0D85 | RECOMMENDED,  // Sinh
     81    0x0710 | LIMITED_USE | RTL,  // Syrc
     82    0x0B95 | RECOMMENDED,  // Taml
     83    0x0C15 | RECOMMENDED,  // Telu
     84    0x078C | RECOMMENDED | RTL,  // Thaa
     85    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
     86    0x0F40 | RECOMMENDED,  // Tibt
     87    0x14C0 | LIMITED_USE,  // Cans
     88    0xA288 | LIMITED_USE | LB_LETTERS,  // Yiii
     89    0x1703 | EXCLUSION,  // Tglg
     90    0x1723 | EXCLUSION,  // Hano
     91    0x1743 | EXCLUSION,  // Buhd
     92    0x1763 | EXCLUSION,  // Tagb
     93    0x280E | UNKNOWN,  // Brai
     94    0x10800 | EXCLUSION | RTL,  // Cprt
     95    0x1900 | LIMITED_USE,  // Limb
     96    0x10000 | EXCLUSION,  // Linb
     97    0x10480 | EXCLUSION,  // Osma
     98    0x10450 | EXCLUSION,  // Shaw
     99    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
    100    0x10380 | EXCLUSION,  // Ugar
    101    0,
    102    0x1A00 | EXCLUSION,  // Bugi
    103    0x2C00 | EXCLUSION | CASED,  // Glag
    104    0x10A00 | EXCLUSION | RTL,  // Khar
    105    0xA800 | LIMITED_USE,  // Sylo
    106    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
    107    0x2D30 | LIMITED_USE,  // Tfng
    108    0x103A0 | EXCLUSION,  // Xpeo
    109    0x1B05 | LIMITED_USE,  // Bali
    110    0x1BC0 | LIMITED_USE,  // Batk
    111    0,
    112    0x11005 | EXCLUSION,  // Brah
    113    0xAA00 | LIMITED_USE,  // Cham
    114    0,
    115    0,
    116    0,
    117    0,
    118    0x13153 | EXCLUSION,  // Egyp
    119    0,
    120    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
    121    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
    122    0x16B1C | EXCLUSION,  // Hmng
    123    0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
    124    0,
    125    0xA984 | LIMITED_USE,  // Java
    126    0xA90A | LIMITED_USE,  // Kali
    127    0,
    128    0,
    129    0x1C00 | LIMITED_USE,  // Lepc
    130    0x10647 | EXCLUSION,  // Lina
    131    0x0840 | LIMITED_USE | RTL,  // Mand
    132    0,
    133    0x10980 | EXCLUSION | RTL,  // Mero
    134    0x07CA | LIMITED_USE | RTL,  // Nkoo
    135    0x10C00 | EXCLUSION | RTL,  // Orkh
    136    0x1036B | EXCLUSION,  // Perm
    137    0xA840 | EXCLUSION,  // Phag
    138    0x10900 | EXCLUSION | RTL,  // Phnx
    139    0x16F00 | LIMITED_USE,  // Plrd
    140    0,
    141    0,
    142    0,
    143    0,
    144    0,
    145    0,
    146    0xA549 | LIMITED_USE,  // Vaii
    147    0,
    148    0x12000 | EXCLUSION,  // Xsux
    149    0,
    150    0xFDD0 | UNKNOWN,  // Zzzz
    151    0x102A0 | EXCLUSION,  // Cari
    152    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
    153    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
    154    0x10280 | EXCLUSION,  // Lyci
    155    0x10920 | EXCLUSION | RTL,  // Lydi
    156    0x1C5A | LIMITED_USE,  // Olck
    157    0xA930 | EXCLUSION,  // Rjng
    158    0xA882 | LIMITED_USE,  // Saur
    159    0x1D850 | EXCLUSION,  // Sgnw
    160    0x1B83 | LIMITED_USE,  // Sund
    161    0,
    162    0xABC0 | LIMITED_USE,  // Mtei
    163    0x10840 | EXCLUSION | RTL,  // Armi
    164    0x10B00 | EXCLUSION | RTL,  // Avst
    165    0x11103 | LIMITED_USE,  // Cakm
    166    0xAC00 | RECOMMENDED,  // Kore
    167    0x11083 | EXCLUSION,  // Kthi
    168    0x10AD8 | EXCLUSION | RTL,  // Mani
    169    0x10B60 | EXCLUSION | RTL,  // Phli
    170    0x10B8F | EXCLUSION | RTL,  // Phlp
    171    0,
    172    0x10B40 | EXCLUSION | RTL,  // Prti
    173    0x0800 | EXCLUSION | RTL,  // Samr
    174    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
    175    0,
    176    0,
    177    0xA6A0 | LIMITED_USE,  // Bamu
    178    0xA4D0 | LIMITED_USE,  // Lisu
    179    0,
    180    0x10A60 | EXCLUSION | RTL,  // Sarb
    181    0x16AE6 | EXCLUSION,  // Bass
    182    0x1BC20 | EXCLUSION,  // Dupl
    183    0x10500 | EXCLUSION,  // Elba
    184    0x11315 | EXCLUSION,  // Gran
    185    0,
    186    0,
    187    0x1E802 | EXCLUSION | RTL,  // Mend
    188    0x109A0 | EXCLUSION | RTL,  // Merc
    189    0x10A95 | EXCLUSION | RTL,  // Narb
    190    0x10896 | EXCLUSION | RTL,  // Nbat
    191    0x10873 | EXCLUSION | RTL,  // Palm
    192    0x112BE | EXCLUSION,  // Sind
    193    0x118B4 | EXCLUSION | CASED,  // Wara
    194    0,
    195    0,
    196    0x16A4F | EXCLUSION,  // Mroo
    197    0x1B1C4 | EXCLUSION | LB_LETTERS,  // Nshu
    198    0x11183 | EXCLUSION,  // Shrd
    199    0x110D0 | EXCLUSION,  // Sora
    200    0x11680 | EXCLUSION,  // Takr
    201    0x18229 | EXCLUSION | LB_LETTERS,  // Tang
    202    0,
    203    0x14400 | EXCLUSION,  // Hluw
    204    0x11208 | EXCLUSION,  // Khoj
    205    0x11484 | EXCLUSION,  // Tirh
    206    0x10537 | EXCLUSION,  // Aghb
    207    0x11152 | EXCLUSION,  // Mahj
    208    0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
    209    0x108F4 | EXCLUSION | RTL,  // Hatr
    210    0x1160E | EXCLUSION,  // Modi
    211    0x1128F | EXCLUSION,  // Mult
    212    0x11AC0 | EXCLUSION,  // Pauc
    213    0x1158E | EXCLUSION,  // Sidd
    214    0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
    215    0x11C0E | EXCLUSION,  // Bhks
    216    0x11C72 | EXCLUSION,  // Marc
    217    0x11412 | LIMITED_USE,  // Newa
    218    0x104B5 | LIMITED_USE | CASED,  // Osge
    219    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
    220    0x1112 | RECOMMENDED,  // Jamo
    221    0,
    222    0x11D10 | EXCLUSION,  // Gonm
    223    0x11A5C | EXCLUSION,  // Soyo
    224    0x11A0B | EXCLUSION,  // Zanb
    225    0x1180B | EXCLUSION,  // Dogr
    226    0x11D71 | EXCLUSION,  // Gong
    227    0x11EE5 | EXCLUSION,  // Maka
    228    0x16E40 | EXCLUSION | CASED,  // Medf
    229    0x10D12 | LIMITED_USE | RTL,  // Rohg
    230    0x10F42 | EXCLUSION | RTL,  // Sogd
    231    0x10F19 | EXCLUSION | RTL,  // Sogo
    232    0x10FF1 | EXCLUSION | RTL,  // Elym
    233    0x1E108 | LIMITED_USE,  // Hmnp
    234    0x119CE | EXCLUSION,  // Nand
    235    0x1E2E1 | LIMITED_USE,  // Wcho
    236    0x10FBF | EXCLUSION | RTL,  // Chrs
    237    0x1190C | EXCLUSION,  // Diak
    238    0x18C65 | EXCLUSION | LB_LETTERS,  // Kits
    239    0x10E88 | EXCLUSION | RTL,  // Yezi
    240    0x12FE5 | EXCLUSION,  // Cpmn
    241    0x10F7C | EXCLUSION | RTL,  // Ougr
    242    0x16ABC | EXCLUSION,  // Tnsa
    243    0x1E290 | EXCLUSION,  // Toto
    244    0x10582 | EXCLUSION | CASED,  // Vith
    245    0x11F1B | EXCLUSION | LB_LETTERS,  // Kawi
    246    0x1E4E6 | EXCLUSION,  // Nagm
    247    0,
    248    0x10D5D | EXCLUSION | RTL | CASED,  // Gara
    249    0x1611C | EXCLUSION,  // Gukh
    250    0x16D45 | EXCLUSION,  // Krai
    251    0x1E5D0 | EXCLUSION,  // Onao
    252    0x11BC4 | EXCLUSION,  // Sunu
    253    0x105C2 | EXCLUSION,  // Todr
    254    0x11392 | EXCLUSION,  // Tutg
    255    0x16EA1 | EXCLUSION | CASED,  // Berf
    256    0x10950 | EXCLUSION | RTL,  // Sidt
    257    0x1E6D5 | EXCLUSION | LB_LETTERS,  // Tayo
    258    0x11DC6 | EXCLUSION,  // Tols
    259    // End copy-paste from parsescriptmetadata.py
    260 };
    261 
    262 int32_t getScriptProps(UScriptCode script) {
    263    if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
    264        return SCRIPT_PROPS[script];
    265    } else {
    266        return 0;
    267    }
    268 }
    269 
    270 }  // namespace
    271 
    272 U_CAPI int32_t U_EXPORT2
    273 uscript_getSampleString(UScriptCode script, char16_t *dest, int32_t capacity, UErrorCode *pErrorCode) {
    274    if(U_FAILURE(*pErrorCode)) { return 0; }
    275    if(capacity < 0 || (capacity > 0 && dest == nullptr)) {
    276        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    277        return 0;
    278    }
    279    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    280    int32_t length;
    281    if(sampleChar == 0) {
    282        length = 0;
    283    } else {
    284        length = U16_LENGTH(sampleChar);
    285        if(length <= capacity) {
    286            int32_t i = 0;
    287            U16_APPEND_UNSAFE(dest, i, sampleChar);
    288        }
    289    }
    290    return u_terminateUChars(dest, capacity, length, pErrorCode);
    291 }
    292 
    293 U_COMMON_API icu::UnicodeString U_EXPORT2
    294 uscript_getSampleUnicodeString(UScriptCode script) {
    295    icu::UnicodeString sample;
    296    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    297    if(sampleChar != 0) {
    298        sample.append(sampleChar);
    299    }
    300    return sample;
    301 }
    302 
    303 U_CAPI UScriptUsage U_EXPORT2
    304 uscript_getUsage(UScriptCode script) {
    305    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
    306 }
    307 
    308 U_CAPI UBool U_EXPORT2
    309 uscript_isRightToLeft(UScriptCode script) {
    310    return (getScriptProps(script) & RTL) != 0;
    311 }
    312 
    313 U_CAPI UBool U_EXPORT2
    314 uscript_breaksBetweenLetters(UScriptCode script) {
    315    return (getScriptProps(script) & LB_LETTERS) != 0;
    316 }
    317 
    318 U_CAPI UBool U_EXPORT2
    319 uscript_isCased(UScriptCode script) {
    320    return (getScriptProps(script) & CASED) != 0;
    321 }