tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

locale2lcid.h (18368B)


      1 /*  GRAPHITE2 LICENSING
      2 
      3    Copyright 2010, SIL International
      4    All rights reserved.
      5 
      6    This library is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU Lesser General Public License as published
      8    by the Free Software Foundation; either version 2.1 of License, or
      9    (at your option) any later version.
     10 
     11    This program is distributed in the hope that it will be useful,
     12    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14    Lesser General Public License for more details.
     15 
     16    You should also have received a copy of the GNU Lesser General Public
     17    License along with this library in the file named "LICENSE".
     18    If not, write to the Free Software Foundation, 51 Franklin Street,
     19    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
     20    internet at http://www.fsf.org/licenses/lgpl.html.
     21 
     22 Alternatively, the contents of this file may be used under the terms of the
     23 Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
     24 License, as published by the Free Software Foundation, either version 2
     25 of the License or (at your option) any later version.
     26 */
     27 #pragma once
     28 #include <cstring>
     29 #include <cassert>
     30 
     31 #include "inc/Main.h"
     32 
     33 
     34 namespace graphite2 {
     35 
     36 struct IsoLangEntry
     37 {
     38    unsigned short mnLang;
     39    char maLangStr[4];
     40    char maCountry[3];
     41 };
     42 
     43 // Windows Language ID, Locale ISO-639 language, country code as used in
     44 // naming table of OpenType fonts
     45 const IsoLangEntry LANG_ENTRIES[] = {
     46    { 0x0401, "ar","SA" }, // Arabic Saudi Arabia
     47    { 0x0402, "bg","BG" }, // Bulgarian Bulgaria
     48    { 0x0403, "ca","ES" }, // Catalan Catalan
     49    { 0x0404, "zh","TW" }, // Chinese Taiwan
     50    { 0x0405, "cs","CZ" }, // Czech Czech Republic
     51    { 0x0406, "da","DK" }, // Danish Denmark
     52    { 0x0407, "de","DE" }, // German Germany
     53    { 0x0408, "el","GR" }, // Greek Greece
     54    { 0x0409, "en","US" }, // English United States
     55    { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain
     56    { 0x040B, "fi","FI" }, // Finnish Finland
     57    { 0x040C, "fr","FR" }, // French France
     58    { 0x040D, "he","IL" }, // Hebrew Israel
     59    { 0x040E, "hu","HU" }, // Hungarian Hungary
     60    { 0x040F, "is","IS" }, // Icelandic Iceland
     61    { 0x0410, "it","IT" }, // Italian Italy
     62    { 0x0411, "jp","JP" }, // Japanese Japan
     63    { 0x0412, "ko","KR" }, // Korean Korea
     64    { 0x0413, "nl","NL" }, // Dutch Netherlands
     65    { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway
     66    { 0x0415, "pl","PL" }, // Polish Poland
     67    { 0x0416, "pt","BR" }, // Portuguese Brazil
     68    { 0x0417, "rm","CH" }, // Romansh Switzerland
     69    { 0x0418, "ro","RO" }, // Romanian Romania
     70    { 0x0419, "ru","RU" }, // Russian Russia
     71    { 0x041A, "hr","HR" }, // Croatian Croatia
     72    { 0x041B, "sk","SK" }, // Slovak Slovakia
     73    { 0x041C, "sq","AL" }, // Albanian Albania
     74    { 0x041D, "sv","SE" }, // Swedish Sweden
     75    { 0x041E, "th","TH" }, // Thai Thailand
     76    { 0x041F, "tr","TR" }, // Turkish Turkey
     77    { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan
     78    { 0x0421, "id","ID" }, // Indonesian Indonesia
     79    { 0x0422, "uk","UA" }, // Ukrainian Ukraine
     80    { 0x0423, "be","BY" }, // Belarusian Belarus
     81    { 0x0424, "sl","SI" }, // Slovenian Slovenia
     82    { 0x0425, "et","EE" }, // Estonian Estonia
     83    { 0x0426, "lv","LV" }, // Latvian Latvia
     84    { 0x0427, "lt","LT" }, // Lithuanian Lithuania
     85    { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan
     86    { 0x042A, "vi","VN" }, // Vietnamese Vietnam
     87    { 0x042B, "hy","AM" }, // Armenian Armenia
     88    { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan
     89    { 0x042D, "eu","" }, // Basque Basque
     90    { 0x042E, "hsb","DE" }, // Upper Sorbian Germany
     91    { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia
     92    { 0x0432, "tn","ZA" }, // Setswana South Africa
     93    { 0x0434, "xh","ZA" }, // isiXhosa South Africa
     94    { 0x0435, "zu","ZA" }, // isiZulu South Africa
     95    { 0x0436, "af","ZA" }, // Afrikaans South Africa
     96    { 0x0437, "ka","GE" }, // Georgian Georgia
     97    { 0x0438, "fo","FO" }, // Faroese Faroe Islands
     98    { 0x0439, "hi","IN" }, // Hindi India
     99    { 0x043A, "mt","MT" }, // Maltese Malta
    100    { 0x043B, "se","NO" }, // Sami (Northern) Norway
    101    { 0x043E, "ms","MY" }, // Malay Malaysia
    102    { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan
    103    { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan
    104    { 0x0441, "sw","KE" }, // Kiswahili Kenya
    105    { 0x0442, "tk","TM" }, // Turkmen Turkmenistan
    106    { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan
    107    { 0x0444, "tt","RU" }, // Tatar Russia
    108    { 0x0445, "bn","IN" }, // Bengali India
    109    { 0x0446, "pa","IN" }, // Punjabi India
    110    { 0x0447, "gu","IN" }, // Gujarati India
    111    { 0x0448, "or","IN" }, // Oriya India
    112    { 0x0448, "wo","SN" }, // Wolof Senegal
    113    { 0x0449, "ta","IN" }, // Tamil India
    114    { 0x044A, "te","IN" }, // Telugu India
    115    { 0x044B, "kn","IN" }, // Kannada India
    116    { 0x044C, "ml","IN" }, // Malayalam India
    117    { 0x044D, "as","IN" }, // Assamese India
    118    { 0x044E, "mr","IN" }, // Marathi India
    119    { 0x044F, "sa","IN" }, // Sanskrit India
    120    { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia
    121    { 0x0451, "bo","CN" }, // Tibetan PRC
    122    { 0x0452, "cy","GB" }, // Welsh United Kingdom
    123    { 0x0453, "km","KH" }, // Khmer Cambodia
    124    { 0x0454, "lo","LA" }, // Lao Lao P.D.R.
    125    { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore
    126    { 0x0456, "gl","ES" }, // Galician Galician
    127    { 0x0457, "kok","IN" }, // Konkani India
    128    { 0x045A, "syr","TR" }, // Syriac Syria
    129    { 0x045B, "si","LK" }, // Sinhala Sri Lanka
    130    { 0x045D, "iu","CA" }, // Inuktitut Canada
    131    { 0x045E, "am","ET" }, // Amharic Ethiopia
    132    { 0x0461, "ne","NP" }, // Nepali Nepal
    133    { 0x0462, "fy","NL" }, // Frisian Netherlands
    134    { 0x0463, "ps","AF" }, // Pashto Afghanistan
    135    { 0x0464, "fil","PH" }, // Filipino Philippines
    136    { 0x0465, "dv","MV" }, // Divehi Maldives
    137    { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria
    138    { 0x046A, "yo","NG" }, // Yoruba Nigeria
    139    { 0x046B, "qu","BO" }, // Quechua Bolivia
    140    { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa
    141    { 0x046D, "ba","RU" }, // Bashkir Russia
    142    { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg
    143    { 0x046F, "kl","GL" }, // Greenlandic Greenland
    144    { 0x0470, "ig","NG" }, // Igbo Nigeria
    145    { 0x0478, "ii","CN" }, // Yi PRC
    146    { 0x047A, "arn","CL" }, // Mapudungun Chile
    147    { 0x047C, "moh","CA" }, // Mohawk Mohawk
    148    { 0x047E, "br","FR" }, // Breton France
    149    { 0x0480, "ug","CN" }, // Uighur PRC
    150    { 0x0481, "mi","NZ" }, // Maori New Zealand
    151    { 0x0482, "oc","FR" }, // Occitan France
    152    { 0x0483, "co","FR" }, // Corsican France
    153    { 0x0484, "gsw","FR" }, // Alsatian France
    154    { 0x0485, "sah","RU" }, // Yakut Russia
    155    { 0x0486, "qut","GT" }, // K'iche Guatemala
    156    { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda
    157    { 0x048C, "gbz","AF" }, // Dari Afghanistan
    158    { 0x0801, "ar","IQ" }, // Arabic Iraq
    159    { 0x0804, "zn","CH" }, // Chinese People's Republic of China
    160    { 0x0807, "de","CH" }, // German Switzerland
    161    { 0x0809, "en","GB" }, // English United Kingdom
    162    { 0x080A, "es","MX" }, // Spanish Mexico
    163    { 0x080C, "fr","BE" }, // French Belgium
    164    { 0x0810, "it","CH" }, // Italian Switzerland
    165    { 0x0813, "nl","BE" }, // Dutch Belgium
    166    { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway
    167    { 0x0816, "pt","PT" }, // Portuguese Portugal
    168    { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia
    169    { 0x081D, "sv","FI" }, // Sweden Finland
    170    { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan
    171    { 0x082E, "dsb","DE" }, // Lower Sorbian Germany
    172    { 0x083B, "se","SE" }, // Sami (Northern) Sweden
    173    { 0x083C, "ga","IE" }, // Irish Ireland
    174    { 0x083E, "ms","BN" }, // Malay Brunei Darussalam
    175    { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan
    176    { 0x0845, "bn","BD" }, // Bengali Bangladesh
    177    { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China
    178    { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada
    179    { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria
    180    { 0x086B, "es","EC" }, // Quechua Ecuador
    181    { 0x0C01, "ar","EG" }, // Arabic Egypt
    182    { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R.
    183    { 0x0C07, "de","AT" }, // German Austria
    184    { 0x0C09, "en","AU" }, // English Australia
    185    { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain
    186    { 0x0C0C, "fr","CA" }, // French Canada
    187    { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia
    188    { 0x0C3B, "se","FI" }, // Sami (Northern) Finland
    189    { 0x0C6B, "qu","PE" }, // Quechua Peru
    190    { 0x1001, "ar","LY" }, // Arabic Libya
    191    { 0x1004, "zh","SG" }, // Chinese Singapore
    192    { 0x1007, "de","LU" }, // German Luxembourg
    193    { 0x1009, "en","CA" }, // English Canada
    194    { 0x100A, "es","GT" }, // Spanish Guatemala
    195    { 0x100C, "fr","CH" }, // French Switzerland
    196    { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina
    197    { 0x103B, "smj","NO" }, // Sami (Lule) Norway
    198    { 0x1401, "ar","DZ" }, // Arabic Algeria
    199    { 0x1404, "zh","MO" }, // Chinese Macao S.A.R.
    200    { 0x1407, "de","LI" }, // German Liechtenstein
    201    { 0x1409, "en","NZ" }, // English New Zealand
    202    { 0x140A, "es","CR" }, // Spanish Costa Rica
    203    { 0x140C, "fr","LU" }, // French Luxembourg
    204    { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina
    205    { 0x143B, "smj","SE" }, // Sami (Lule) Sweden
    206    { 0x1801, "ar","MA" }, // Arabic Morocco
    207    { 0x1809, "en","IE" }, // English Ireland
    208    { 0x180A, "es","PA" }, // Spanish Panama
    209    { 0x180C, "fr","MC" }, // French Principality of Monoco
    210    { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina
    211    { 0x183B, "sma","NO" }, // Sami (Southern) Norway
    212    { 0x1C01, "ar","TN" }, // Arabic Tunisia
    213    { 0x1C09, "en","ZA" }, // English South Africa
    214    { 0x1C0A, "es","DO" }, // Spanish Dominican Republic
    215    { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina
    216    { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden
    217    { 0x2001, "ar","OM" }, // Arabic Oman
    218    { 0x2009, "en","JM" }, // English Jamaica
    219    { 0x200A, "es","VE" }, // Spanish Venezuela
    220    { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina
    221    { 0x203B, "sms","FI" }, // Sami (Skolt) Finland
    222    { 0x2401, "ar","YE" }, // Arabic Yemen
    223    { 0x2409, "en","BS" }, // English Caribbean
    224    { 0x240A, "es","CO" }, // Spanish Colombia
    225    { 0x243B, "smn","FI" }, // Sami (Inari) Finland
    226    { 0x2801, "ar","SY" }, // Arabic Syria
    227    { 0x2809, "en","BZ" }, // English Belize
    228    { 0x280A, "es","PE" }, // Spanish Peru
    229    { 0x2C01, "ar","JO" }, // Arabic Jordan
    230    { 0x2C09, "en","TT" }, // English Trinidad and Tobago
    231    { 0x2C0A, "es","AR" }, // Spanish Argentina
    232    { 0x3001, "ar","LB" }, // Arabic Lebanon
    233    { 0x3009, "en","ZW" }, // English Zimbabwe
    234    { 0x300A, "es","EC" }, // Spanish Ecuador
    235    { 0x3401, "ar","KW" }, // Arabic Kuwait
    236    { 0x3409, "en","PH" }, // English Republic of the Philippines
    237    { 0x340A, "es","CL" }, // Spanish Chile
    238    { 0x3801, "ar","AE" }, // Arabic U.A.E.
    239    { 0x380A, "es","UY" }, // Spanish Uruguay
    240    { 0x3C01, "ar","BH" }, // Arabic Bahrain
    241    { 0x3C0A, "es","PY" }, // Spanish Paraguay
    242    { 0x4001, "ar","QA" }, // Arabic Qatar
    243    { 0x4009, "en","IN" }, // English India
    244    { 0x400A, "es","BO" }, // Spanish Bolivia
    245    { 0x4409, "en","MY" }, // English Malaysia
    246    { 0x440A, "es","SV" }, // Spanish El Salvador
    247    { 0x4809, "en","SG" }, // English Singapore
    248    { 0x480A, "es","HN" }, // Spanish Honduras
    249    { 0x4C0A, "es","NI" }, // Spanish Nicaragua
    250    { 0x500A, "es","PR" }, // Spanish Puerto Rico
    251    { 0x540A, "es","US" } // Spanish United States
    252 };
    253 
    254 class Locale2Lang
    255 {
    256    Locale2Lang(const Locale2Lang &);
    257    Locale2Lang & operator = (const Locale2Lang &);
    258 
    259 public:
    260    Locale2Lang() : mSeedPosition(128)
    261    {
    262        memset((void*)mLangLookup, 0, sizeof(mLangLookup));
    263        // create a tri lookup on first 2 letters of language code
    264        static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
    265        for (int i = 0; i < maxIndex; i++)
    266        {
    267            size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a';
    268            size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a';
    269            if (mLangLookup[a][b])
    270            {
    271                const IsoLangEntry ** old = mLangLookup[a][b];
    272                int len = 1;
    273                while (old[len]) len++;
    274                len += 2;
    275                mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len);
    276                if (!mLangLookup[a][b])
    277                {
    278                    mLangLookup[a][b] = old;
    279                    continue;
    280                }
    281                mLangLookup[a][b][--len] = NULL;
    282                mLangLookup[a][b][--len] = &LANG_ENTRIES[i];
    283                while (--len >= 0)
    284                {
    285                    assert(len >= 0);
    286                    mLangLookup[a][b][len] = old[len];
    287                }
    288                free(old);
    289            }
    290            else
    291            {
    292                mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2);
    293                if (!mLangLookup[a][b]) continue;
    294                mLangLookup[a][b][1] = NULL;
    295                mLangLookup[a][b][0] = &LANG_ENTRIES[i];
    296            }
    297        }
    298        while (2 * mSeedPosition < maxIndex)
    299            mSeedPosition *= 2;
    300    };
    301    ~Locale2Lang()
    302    {
    303        for (int i = 0; i != 26; ++i)
    304            for (int j = 0; j != 26; ++j)
    305                free(mLangLookup[i][j]);
    306    }
    307    unsigned short getMsId(const char * locale) const
    308    {
    309        size_t length = strlen(locale);
    310        size_t langLength = length;
    311        const char * language = locale;
    312        const char * script = NULL;
    313        const char * region = NULL;
    314        size_t regionLength = 0;
    315        const char * dash = strchr(locale, '-');
    316        if (dash && (dash != locale))
    317        {
    318            langLength = (dash - locale);
    319            size_t nextPartLength = length - langLength - 1;
    320            if (nextPartLength >= 2)
    321            {
    322                script = ++dash;
    323                dash = strchr(dash, '-');
    324                if (dash)
    325                {
    326                    nextPartLength = (dash - script);
    327                    region = ++dash;
    328                }
    329                if (nextPartLength == 2 &&
    330                    (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) &&
    331                    (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B))
    332                {
    333                    region = script;
    334                    regionLength = nextPartLength;
    335                    script = NULL;
    336                }
    337                else if (nextPartLength == 4)
    338                {
    339                    if (dash)
    340                    {
    341                        dash = strchr(dash, '-');
    342                        if (dash)
    343                        {
    344                            nextPartLength = (dash - region);
    345                        }
    346                        else
    347                        {
    348                            nextPartLength = langLength - (region - locale);
    349                        }
    350                        regionLength = nextPartLength;
    351                    }
    352                }
    353            }
    354        }
    355        size_t a = 'e' - 'a';
    356        size_t b = 'n' - 'a';
    357        unsigned short langId = 0;
    358        int i = 0;
    359        switch (langLength)
    360        {
    361            case 2:
    362            {
    363                a = language[0] - 'a';
    364                b = language[1] - 'a';
    365                if ((a < 26) && (b < 26) && mLangLookup[a][b])
    366                {
    367                    while (mLangLookup[a][b][i])
    368                    {
    369                        if (mLangLookup[a][b][i]->maLangStr[2] != '\0')
    370                        {
    371                            ++i;
    372                            continue;
    373                        }
    374                        if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
    375                        {
    376                            langId = mLangLookup[a][b][i]->mnLang;
    377                            break;
    378                        }
    379                        else if (langId == 0)
    380                        {
    381                            // possible fallback code
    382                            langId = mLangLookup[a][b][i]->mnLang;
    383                        }
    384                        ++i;
    385                    }
    386                }
    387            }
    388            break;
    389            case 3:
    390            {
    391                a = language[0] - 'a';
    392                b = language[1] - 'a';
    393                if (mLangLookup[a][b])
    394                {
    395                    while (mLangLookup[a][b][i])
    396                    {
    397                        if (mLangLookup[a][b][i]->maLangStr[2] != language[2])
    398                        {
    399                            ++i;
    400                            continue;
    401                        }
    402                        if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0))
    403                        {
    404                            langId = mLangLookup[a][b][i]->mnLang;
    405                            break;
    406                        }
    407                        else if (langId == 0)
    408                        {
    409                            // possible fallback code
    410                            langId = mLangLookup[a][b][i]->mnLang;
    411                        }
    412                        ++i;
    413                    }
    414                }
    415            }
    416            break;
    417            default:
    418                break;
    419        }
    420        if (langId == 0) langId = 0x409;
    421        return langId;
    422    }
    423    const IsoLangEntry * findEntryById(unsigned short langId) const
    424    {
    425        static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry);
    426        int window = mSeedPosition;
    427        int guess = mSeedPosition - 1;
    428        while (LANG_ENTRIES[guess].mnLang != langId)
    429        {
    430            window /= 2;
    431            if (window == 0) return NULL;
    432            guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window;
    433            while (guess >= maxIndex)
    434            {
    435                window /= 2;
    436                guess -= window;
    437                assert(window);
    438            }
    439        }
    440        return &LANG_ENTRIES[guess];
    441    }
    442 
    443    CLASS_NEW_DELETE;
    444 
    445 private:
    446    const IsoLangEntry ** mLangLookup[26][26];
    447    int mSeedPosition;
    448 };
    449 
    450 } // namespace graphite2