tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

csrsbcs.cpp (56686B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2005-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #include "cmemory.h"
     13 
     14 #if !UCONFIG_NO_CONVERSION
     15 #include "csrsbcs.h"
     16 #include "csmatch.h"
     17 
     18 #define N_GRAM_SIZE 3
     19 #define N_GRAM_MASK 0xFFFFFF
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
     24 : ngram(0), byteIndex(0)
     25 {
     26    ngramList = theNgramList;
     27    charMap   = theCharMap;
     28 
     29    ngramCount = hitCount = 0;
     30 }
     31 
     32 NGramParser::~NGramParser()
     33 {
     34 }
     35 
     36 /*
     37 * Binary search for value in table, which must have exactly 64 entries.
     38 */
     39 
     40 int32_t NGramParser::search(const int32_t *table, int32_t value)
     41 {
     42    int32_t index = 0;
     43 
     44    if (table[index + 32] <= value) {
     45        index += 32;
     46    }
     47 
     48    if (table[index + 16] <= value) {
     49        index += 16;
     50    }
     51 
     52    if (table[index + 8] <= value) {
     53        index += 8;
     54    }
     55 
     56    if (table[index + 4] <= value) {
     57        index += 4;
     58    }
     59 
     60    if (table[index + 2] <= value) {
     61        index += 2;
     62    }
     63 
     64    if (table[index + 1] <= value) {
     65        index += 1;
     66    }
     67 
     68    if (table[index] > value) {
     69        index -= 1;
     70    }
     71 
     72    if (index < 0 || table[index] != value) {
     73        return -1;
     74    }
     75 
     76    return index;
     77 }
     78 
     79 void NGramParser::lookup(int32_t thisNgram)
     80 {
     81    ngramCount += 1;
     82 
     83    if (search(ngramList, thisNgram) >= 0) {
     84        hitCount += 1;
     85    }
     86 
     87 }
     88 
     89 void NGramParser::addByte(int32_t b)
     90 {
     91    ngram = ((ngram << 8) + b) & N_GRAM_MASK;
     92    lookup(ngram);
     93 }
     94 
     95 int32_t NGramParser::nextByte(InputText *det)
     96 {
     97    if (byteIndex >= det->fInputLen) {
     98        return -1;
     99    }
    100 
    101    return det->fInputBytes[byteIndex++];
    102 }
    103 
    104 void NGramParser::parseCharacters(InputText *det)
    105 {
    106    int32_t b;
    107    bool ignoreSpace = false;
    108 
    109    while ((b = nextByte(det)) >= 0) {
    110        uint8_t mb = charMap[b];
    111 
    112        // TODO: 0x20 might not be a space in all character sets...
    113        if (mb != 0) {
    114            if (!(mb == 0x20 && ignoreSpace)) {
    115                addByte(mb);
    116            }
    117 
    118            ignoreSpace = (mb == 0x20);
    119        }
    120    }
    121 }
    122 
    123 int32_t NGramParser::parse(InputText *det)
    124 {
    125    parseCharacters(det);
    126 
    127    // TODO: Is this OK? The buffer could have ended in the middle of a word...
    128    addByte(0x20);
    129 
    130    double rawPercent = static_cast<double>(hitCount) / static_cast<double>(ngramCount);
    131 
    132    //            if (rawPercent <= 2.0) {
    133    //                return 0;
    134    //            }
    135 
    136    // TODO - This is a bit of a hack to take care of a case
    137    // were we were getting a confidence of 135...
    138    if (rawPercent > 0.33) {
    139        return 98;
    140    }
    141 
    142    return static_cast<int32_t>(rawPercent * 300.0);
    143 }
    144 
    145 #if !UCONFIG_ONLY_HTML_CONVERSION
    146 static const uint8_t unshapeMap_IBM420[] = {
    147 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    148 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    149 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    150 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    151 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    152 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 
    153 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 
    154 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    155 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 
    156 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F, 
    157 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E, 
    158 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF, 
    159 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF, 
    160 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF, 
    161 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF, 
    162 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    163 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
    164 };
    165 
    166 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
    167 {
    168 alef = 0x00;
    169 }
    170 
    171 NGramParser_IBM420::~NGramParser_IBM420() {}
    172 
    173 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
    174 {
    175 if(b == 0xB2 || b == 0xB3){
    176         	return 0x47;        		
    177        }else if(b == 0xB4 || b == 0xB5){
    178         	return 0x49;
    179        }else if(b == 0xB8 || b == 0xB9){
    180         	return 0x56;
    181        }else
    182         	return 0x00;
    183 }
    184 
    185 /*
    186 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
    187 * because CharsetDetector is dealing with bytes not Unicode code points. We could
    188 * convert the bytes to Unicode code points but that would leave us dependent
    189 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
    190 * of JDK can produce different results and therefore is also avoided.
    191 */ 
    192 int32_t NGramParser_IBM420::nextByte(InputText *det)
    193 {
    194 
    195    if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
    196        return -1;
    197    }              
    198    int next;
    199             
    200    alef = isLamAlef(det->fInputBytes[byteIndex]);
    201    if(alef != 0x00)
    202        next = 0xB1 & 0xFF;
    203    else
    204        next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
    205            
    206    byteIndex++;
    207             
    208    return next;
    209 }
    210 
    211 void NGramParser_IBM420::parseCharacters(InputText *det)
    212 {
    213 int32_t b;
    214    bool ignoreSpace = false;
    215 
    216    while ((b = nextByte(det)) >= 0) {
    217        uint8_t mb = charMap[b];
    218 
    219        // TODO: 0x20 might not be a space in all character sets...
    220        if (mb != 0) {
    221            if (!(mb == 0x20 && ignoreSpace)) {
    222                addByte(mb);
    223            }
    224            ignoreSpace = (mb == 0x20);
    225        }
    226 	
    227 	if(alef != 0x00){
    228            mb = charMap[alef & 0xFF];
    229                     
    230            // TODO: 0x20 might not be a space in all character sets...
    231            if (mb != 0) {
    232                if (!(mb == 0x20 && ignoreSpace)) {
    233                    addByte(mb);                    
    234                }
    235                         
    236                ignoreSpace = (mb == 0x20);
    237            }
    238                	 
    239        }
    240    }
    241 }
    242 #endif
    243 
    244 CharsetRecog_sbcs::CharsetRecog_sbcs()
    245 {
    246    // nothing else to do
    247 }
    248 
    249 CharsetRecog_sbcs::~CharsetRecog_sbcs()
    250 {
    251    // nothing to do
    252 }
    253 
    254 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
    255 {
    256    NGramParser parser(ngrams, byteMap);
    257    int32_t result;
    258 
    259    result = parser.parse(det);
    260 
    261    return result;
    262 }
    263 
    264 static const uint8_t charMap_8859_1[] = {
    265    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    266    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    267    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    268    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    269    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    270    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    271    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    272    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    273    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    274    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    275    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    276    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    277    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    278    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    279    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    280    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    281    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    282    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    283    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    284    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    285    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    286    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    287    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
    288    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    289    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    290    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    291    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    292    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
    293    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    294    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    295    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    296    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
    297 };
    298 
    299 static const uint8_t charMap_8859_2[] = {
    300    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    301    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    302    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    303    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    304    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    305    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    306    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    307    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    308    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    309    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    310    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    311    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    312    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    313    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    314    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    315    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    316    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    317    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    318    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    319    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    320    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 
    321    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
    322    0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 
    323    0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 
    324    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    325    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    326    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    327    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 
    328    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    329    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    330    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    331    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
    332 };
    333 
    334 static const uint8_t charMap_8859_5[] = {
    335    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    336    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    337    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    338    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    339    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    340    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    341    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    342    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    343    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    344    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    345    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    346    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    347    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    348    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    349    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    350    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    351    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    352    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    353    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    354    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    355    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    356    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
    357    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
    358    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    359    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    360    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    361    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
    362    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    363    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    364    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    365    0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    366    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 
    367 };
    368 
    369 static const uint8_t charMap_8859_6[] = {
    370    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    371    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    372    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    373    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    374    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    375    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    376    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    377    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    378    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    379    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    380    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    381    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    382    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    383    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    384    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    385    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    386    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    387    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    388    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    389    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    390    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    391    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    392    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    393    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    394    0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
    395    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
    396    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
    397    0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    398    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    399    0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    400    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    401    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    402 };
    403 
    404 static const uint8_t charMap_8859_7[] = {
    405    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    406    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    407    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    408    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    409    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    410    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    411    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    412    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    413    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    414    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    415    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    416    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    417    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    418    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    419    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    420    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    421    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    422    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    423    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    424    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    425    0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 
    426    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    427    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 
    428    0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 
    429    0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    430    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    431    0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    432    0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 
    433    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    434    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    435    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    436    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, 
    437 };
    438 
    439 static const uint8_t charMap_8859_8[] = {
    440    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    441    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    442    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    443    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    444    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    445    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    446    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    447    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    448    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    449    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    450    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    451    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    452    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    453    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    454    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    455    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    456    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    457    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    458    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    459    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    460    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    461    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    462    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
    463    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    464    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    465    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    466    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    467    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    468    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    469    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    470    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    471    0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    472 };
    473 
    474 static const uint8_t charMap_8859_9[] = {
    475    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    476    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    477    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    478    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    479    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    480    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    481    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    482    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    483    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    484    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    485    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    486    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    487    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    488    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    489    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    490    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    491    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    492    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    493    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    494    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    495    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    496    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    497    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
    498    0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    499    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    500    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    501    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    502    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 
    503    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    504    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    505    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 
    506    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
    507 };
    508 
    509 static const int32_t ngrams_windows_1251[] = {
    510    0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 
    511    0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 
    512    0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 
    513    0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, 
    514 };
    515 
    516 static const uint8_t charMap_windows_1251[] = {
    517    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    518    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    519    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    520    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    521    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    522    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    523    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    524    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    525    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    526    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    527    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    528    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    529    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    530    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    531    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    532    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    533    0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
    534    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
    535    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    536    0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 
    537    0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 
    538    0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 
    539    0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 
    540    0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 
    541    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    542    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    543    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    544    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
    545    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    546    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    547    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 
    548    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 
    549 };
    550 
    551 static const int32_t ngrams_windows_1256[] = {
    552    0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 
    553    0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 
    554    0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 
    555    0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, 
    556 };
    557 
    558 static const uint8_t charMap_windows_1256[] = {
    559    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    560    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    561    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    562    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    563    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    564    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    565    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    566    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    567    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    568    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    569    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    570    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    571    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    572    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    573    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    574    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    575    0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 
    576    0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 
    577    0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    578    0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 
    579    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    580    0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 
    581    0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 
    582    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    583    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
    584    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
    585    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 
    586    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    587    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 
    588    0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 
    589    0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 
    590    0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, 
    591 };
    592 
    593 static const int32_t ngrams_KOI8_R[] = {
    594    0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 
    595    0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 
    596    0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 
    597    0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, 
    598 };
    599 
    600 static const uint8_t charMap_KOI8_R[] = {
    601    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    602    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    603    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    604    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    605    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 
    606    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    607    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    608    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    609    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    610    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    611    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    612    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    613    0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
    614    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 
    615    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
    616    0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 
    617    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    618    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    619    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    620    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    621    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
    622    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    623    0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 
    624    0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 
    625    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
    626    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
    627    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
    628    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    629    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 
    630    0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 
    631    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 
    632    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    633 };
    634 
    635 #if !UCONFIG_ONLY_HTML_CONVERSION
    636 static const int32_t ngrams_IBM424_he_rtl[] = {
    637    0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 
    638    0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 
    639    0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 
    640    0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, 
    641 };
    642 
    643 static const int32_t ngrams_IBM424_he_ltr[] = {
    644    0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
    645    0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
    646    0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
    647    0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
    648 };
    649 
    650 static const uint8_t charMap_IBM424_he[] = {
    651 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    652 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    653 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    654 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    655 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    656 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    657 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    658 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    659 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, 
    660 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    661 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    662 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    663 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    664 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    665 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    666 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    667 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    668 };
    669 
    670 static const int32_t ngrams_IBM420_ar_rtl[] = {
    671    0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
    672    0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
    673    0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
    674    0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
    675 };
    676 
    677 static const int32_t ngrams_IBM420_ar_ltr[] = {
    678    0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 
    679    0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
    680    0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
    681    0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
    682 };
    683 
    684 static const uint8_t charMap_IBM420_ar[]= {
    685 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    686 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    687 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    688 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    689 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    690 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    691 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    692 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    693 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
    694 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 
    695 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 
    696 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 
    697 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 
    698 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, 
    699 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
    700 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 
    701 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 
    702 };
    703 #endif
    704 
    705 //ISO-8859-1,2,5,6,7,8,9 Ngrams
    706 
    707 struct NGramsPlusLang {
    708    const int32_t ngrams[64];
    709    const char *  lang;
    710 };
    711 
    712 static const NGramsPlusLang ngrams_8859_1[] =  {
    713  { 
    714    {
    715    0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 
    716    0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 
    717    0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 
    718    0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, 
    719    },
    720    "en"
    721  },
    722  { 
    723    {
    724    0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 
    725    0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 
    726    0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 
    727    0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, 
    728    },
    729    "da"
    730  },
    731  { 
    732    {
    733    0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 
    734    0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 
    735    0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 
    736    0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, 
    737    },
    738    "de"
    739  },
    740  {
    741    {
    742    0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 
    743    0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 
    744    0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 
    745    0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, 
    746    },
    747    "es"
    748  },
    749  {
    750    {
    751    0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
    752    0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
    753    0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
    754    0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
    755    },
    756    "fr"
    757  },
    758  {
    759    {
    760    0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
    761    0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
    762    0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
    763    0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
    764    },
    765    "it"
    766  },
    767  {
    768    {
    769    0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
    770    0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
    771    0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
    772    0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
    773    },
    774    "nl"
    775  },
    776  {
    777    {
    778    0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
    779    0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
    780    0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
    781    0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
    782    },
    783    "no"
    784  },
    785  {
    786    {
    787    0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    788    0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
    789    0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
    790    0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
    791    },
    792    "pt"
    793  },
    794  {
    795    {
    796    0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
    797    0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
    798    0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
    799    0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
    800    },
    801    "sv"
    802  }
    803 };
    804 
    805 
    806 static const NGramsPlusLang ngrams_8859_2[] =  {
    807  {
    808    {
    809    0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
    810    0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
    811    0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
    812    0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
    813    },
    814    "cs"
    815  },
    816  {
    817    {
    818    0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
    819    0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
    820    0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
    821    0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
    822    },
    823    "hu"
    824  },
    825  {
    826    {
    827    0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
    828    0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
    829    0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
    830    0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
    831    },
    832    "pl"
    833  },
    834  {
    835    {
    836    0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
    837    0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
    838    0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
    839    0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
    840    },
    841    "ro"
    842  }
    843 };
    844 
    845 static const int32_t ngrams_8859_5_ru[] = {
    846    0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
    847    0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
    848    0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
    849    0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
    850 };
    851 
    852 static const int32_t ngrams_8859_6_ar[] = {
    853    0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
    854    0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
    855    0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
    856    0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
    857 };
    858 
    859 static const int32_t ngrams_8859_7_el[] = {
    860    0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
    861    0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
    862    0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
    863    0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
    864 };
    865 
    866 static const int32_t ngrams_8859_8_I_he[] = {
    867    0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
    868    0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
    869    0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
    870    0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
    871 };
    872 
    873 static const int32_t ngrams_8859_8_he[] = {
    874    0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
    875    0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
    876    0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
    877    0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
    878 };
    879 
    880 static const int32_t ngrams_8859_9_tr[] = {
    881    0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
    882    0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
    883    0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
    884    0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
    885 };
    886 
    887 CharsetRecog_8859_1::~CharsetRecog_8859_1()
    888 {
    889    // nothing to do
    890 }
    891 
    892 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
    893    const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
    894    uint32_t i;
    895    int32_t bestConfidenceSoFar = -1;
    896    for (i=0; i < UPRV_LENGTHOF(ngrams_8859_1) ; i++) {
    897        const int32_t *ngrams = ngrams_8859_1[i].ngrams;
    898        const char    *lang   = ngrams_8859_1[i].lang;
    899        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
    900        if (confidence > bestConfidenceSoFar) {
    901            results->set(textIn, this, confidence, name, lang);
    902            bestConfidenceSoFar = confidence;
    903        }
    904    }
    905    return (bestConfidenceSoFar > 0);
    906 }
    907 
    908 const char *CharsetRecog_8859_1::getName() const
    909 {
    910    return "ISO-8859-1";
    911 }
    912 
    913 
    914 CharsetRecog_8859_2::~CharsetRecog_8859_2()
    915 {
    916    // nothing to do
    917 }
    918 
    919 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
    920    const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
    921    uint32_t i;
    922    int32_t bestConfidenceSoFar = -1;
    923    for (i=0; i < UPRV_LENGTHOF(ngrams_8859_2) ; i++) {
    924        const int32_t *ngrams = ngrams_8859_2[i].ngrams;
    925        const char    *lang   = ngrams_8859_2[i].lang;
    926        int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
    927        if (confidence > bestConfidenceSoFar) {
    928            results->set(textIn, this, confidence, name, lang);
    929            bestConfidenceSoFar = confidence;
    930        }
    931    }
    932    return (bestConfidenceSoFar > 0);
    933 }
    934 
    935 const char *CharsetRecog_8859_2::getName() const
    936 {
    937    return "ISO-8859-2";
    938 }
    939 
    940 
    941 CharsetRecog_8859_5::~CharsetRecog_8859_5()
    942 {
    943    // nothing to do
    944 }
    945 
    946 const char *CharsetRecog_8859_5::getName() const
    947 {
    948    return "ISO-8859-5";
    949 }
    950 
    951 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
    952 {
    953    // nothing to do
    954 }
    955 
    956 const char *CharsetRecog_8859_5_ru::getLanguage() const
    957 {
    958    return "ru";
    959 }
    960 
    961 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
    962 {
    963    int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
    964    results->set(textIn, this, confidence);
    965    return (confidence > 0);
    966 }
    967 
    968 CharsetRecog_8859_6::~CharsetRecog_8859_6()
    969 {
    970    // nothing to do
    971 }
    972 
    973 const char *CharsetRecog_8859_6::getName() const
    974 {
    975    return "ISO-8859-6";
    976 }
    977 
    978 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
    979 {
    980    // nothing to do
    981 }
    982 
    983 const char *CharsetRecog_8859_6_ar::getLanguage() const
    984 {
    985    return "ar";
    986 }
    987 
    988 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
    989 {
    990    int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
    991    results->set(textIn, this, confidence);
    992    return (confidence > 0);
    993 }
    994 
    995 CharsetRecog_8859_7::~CharsetRecog_8859_7()
    996 {
    997    // nothing to do
    998 }
    999 
   1000 const char *CharsetRecog_8859_7::getName() const
   1001 {
   1002    return "ISO-8859-7";
   1003 }
   1004 
   1005 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
   1006 {
   1007    // nothing to do
   1008 }
   1009 
   1010 const char *CharsetRecog_8859_7_el::getLanguage() const
   1011 {
   1012    return "el";
   1013 }
   1014 
   1015 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
   1016 {
   1017    const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
   1018    int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
   1019    results->set(textIn, this, confidence, name, "el");
   1020    return (confidence > 0);
   1021 }
   1022 
   1023 CharsetRecog_8859_8::~CharsetRecog_8859_8()
   1024 {
   1025    // nothing to do
   1026 }
   1027 
   1028 const char *CharsetRecog_8859_8::getName() const 
   1029 {
   1030    return "ISO-8859-8";
   1031 }
   1032 
   1033 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
   1034 {
   1035    // nothing to do
   1036 }
   1037 
   1038 const char *CharsetRecog_8859_8_I_he::getName() const
   1039 {
   1040    return "ISO-8859-8-I";
   1041 }
   1042 
   1043 const char *CharsetRecog_8859_8_I_he::getLanguage() const
   1044 {
   1045    return "he";
   1046 }
   1047 
   1048 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
   1049 {
   1050    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
   1051    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
   1052    results->set(textIn, this, confidence, name, "he");
   1053    return (confidence > 0);
   1054 }
   1055 
   1056 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
   1057 {
   1058    // od ot gnihton
   1059 }
   1060 
   1061 const char *CharsetRecog_8859_8_he::getLanguage() const
   1062 {
   1063    return "he";
   1064 }
   1065 
   1066 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
   1067 {
   1068    const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
   1069    int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
   1070    results->set(textIn, this, confidence, name, "he");
   1071    return (confidence > 0);
   1072 }
   1073 
   1074 CharsetRecog_8859_9::~CharsetRecog_8859_9()
   1075 {
   1076    // nothing to do
   1077 }
   1078 
   1079 const char *CharsetRecog_8859_9::getName() const
   1080 {
   1081    return "ISO-8859-9";
   1082 }
   1083 
   1084 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
   1085 {
   1086    // nothing to do
   1087 }
   1088 
   1089 const char *CharsetRecog_8859_9_tr::getLanguage() const
   1090 {
   1091    return "tr";
   1092 }
   1093 
   1094 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
   1095 {
   1096    const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
   1097    int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
   1098    results->set(textIn, this, confidence, name, "tr");
   1099    return (confidence > 0);
   1100 }
   1101 
   1102 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
   1103 {
   1104    // nothing to do
   1105 }
   1106 
   1107 const char *CharsetRecog_windows_1256::getName() const
   1108 {
   1109    return  "windows-1256";
   1110 }
   1111 
   1112 const char *CharsetRecog_windows_1256::getLanguage() const
   1113 {
   1114    return "ar";
   1115 }
   1116 
   1117 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
   1118 {
   1119    int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
   1120    results->set(textIn, this, confidence);
   1121    return (confidence > 0);
   1122 }
   1123 
   1124 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
   1125 {
   1126    // nothing to do
   1127 }
   1128 
   1129 const char *CharsetRecog_windows_1251::getName() const
   1130 {
   1131    return  "windows-1251";
   1132 }
   1133 
   1134 const char *CharsetRecog_windows_1251::getLanguage() const
   1135 {
   1136    return "ru";
   1137 }
   1138 
   1139 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
   1140 {
   1141    int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
   1142    results->set(textIn, this, confidence);
   1143    return (confidence > 0);
   1144 }
   1145 
   1146 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
   1147 {
   1148    // nothing to do
   1149 }
   1150 
   1151 const char *CharsetRecog_KOI8_R::getName() const
   1152 {
   1153    return  "KOI8-R";
   1154 }
   1155 
   1156 const char *CharsetRecog_KOI8_R::getLanguage() const
   1157 {
   1158    return "ru";
   1159 }
   1160 
   1161 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
   1162 {
   1163    int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
   1164    results->set(textIn, this, confidence);
   1165    return (confidence > 0);
   1166 }
   1167 
   1168 #if !UCONFIG_ONLY_HTML_CONVERSION
   1169 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
   1170 {
   1171    // nothing to do
   1172 }
   1173 
   1174 const char *CharsetRecog_IBM424_he::getLanguage() const
   1175 {
   1176    return "he";
   1177 }
   1178 
   1179 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
   1180 {
   1181    // nothing to do
   1182 }
   1183 
   1184 const char *CharsetRecog_IBM424_he_rtl::getName() const
   1185 {
   1186    return  "IBM424_rtl";
   1187 }
   1188 
   1189 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
   1190 {
   1191    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
   1192    results->set(textIn, this, confidence);
   1193    return (confidence > 0);
   1194 }
   1195 
   1196 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
   1197 {
   1198    // nothing to do
   1199 }
   1200 
   1201 const char *CharsetRecog_IBM424_he_ltr::getName() const
   1202 {
   1203    return  "IBM424_ltr";
   1204 }
   1205 
   1206 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
   1207 {
   1208    int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
   1209    results->set(textIn, this, confidence);
   1210    return (confidence > 0);
   1211 }
   1212 
   1213 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
   1214 {
   1215    // nothing to do
   1216 }
   1217 
   1218 const char *CharsetRecog_IBM420_ar::getLanguage() const
   1219 {
   1220    return "ar";
   1221 }
   1222 
   1223    
   1224 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
   1225 {
   1226    NGramParser_IBM420 parser(ngrams, byteMap);
   1227    int32_t result;
   1228    
   1229    result = parser.parse(det);
   1230        
   1231    return result;
   1232 }
   1233 
   1234 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
   1235 {
   1236    // nothing to do
   1237 }
   1238 
   1239 const char *CharsetRecog_IBM420_ar_rtl::getName() const
   1240 {
   1241    return  "IBM420_rtl";
   1242 }
   1243 
   1244 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
   1245 {
   1246    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
   1247    results->set(textIn, this, confidence);
   1248    return (confidence > 0);
   1249 }
   1250 
   1251 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
   1252 {
   1253    // nothing to do
   1254 }
   1255 
   1256 const char *CharsetRecog_IBM420_ar_ltr::getName() const
   1257 {
   1258    return  "IBM420_ltr";
   1259 }
   1260 
   1261 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
   1262 {
   1263    int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
   1264    results->set(textIn, this, confidence);
   1265    return (confidence > 0);
   1266 }
   1267 #endif
   1268 
   1269 U_NAMESPACE_END
   1270 #endif