tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

csrsbcs.h (6978B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2005-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #ifndef __CSRSBCS_H
     11 #define __CSRSBCS_H
     12 
     13 #include "unicode/uobject.h"
     14 
     15 #if !UCONFIG_NO_CONVERSION
     16 
     17 #include "csrecog.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 class NGramParser : public UMemory
     22 {
     23 private:
     24    int32_t ngram;
     25    const int32_t *ngramList;    
     26 
     27    int32_t ngramCount;
     28    int32_t hitCount;
     29 
     30 protected:
     31 int32_t byteIndex;
     32    const uint8_t *charMap;
     33 
     34 void addByte(int32_t b);
     35 
     36 public:
     37    NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
     38    virtual ~NGramParser();
     39 
     40 private:
     41    /*
     42    * Binary search for value in table, which must have exactly 64 entries.
     43    */
     44    int32_t search(const int32_t *table, int32_t value);
     45 
     46    void lookup(int32_t thisNgram);
     47    
     48    virtual int32_t nextByte(InputText *det);
     49 virtual void parseCharacters(InputText *det);
     50 
     51 public:
     52    int32_t parse(InputText *det);
     53 
     54 };
     55 
     56 #if !UCONFIG_ONLY_HTML_CONVERSION
     57 class NGramParser_IBM420 : public NGramParser
     58 {
     59 public:
     60    NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
     61    ~NGramParser_IBM420();
     62 
     63 private:
     64    int32_t alef;
     65    int32_t isLamAlef(int32_t b);
     66    int32_t nextByte(InputText *det) override;
     67    void parseCharacters(InputText *det) override;
     68 };
     69 #endif
     70 
     71 
     72 class CharsetRecog_sbcs : public CharsetRecognizer
     73 {
     74 public:
     75    CharsetRecog_sbcs();
     76    virtual ~CharsetRecog_sbcs();
     77    virtual const char *getName() const override = 0;
     78    virtual UBool match(InputText *det, CharsetMatch *results) const override = 0;
     79    virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
     80 };
     81 
     82 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
     83 {
     84 public:
     85    virtual ~CharsetRecog_8859_1();
     86    const char *getName() const override;
     87    virtual UBool match(InputText *det, CharsetMatch *results) const override;
     88 };
     89 
     90 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
     91 {
     92 public:
     93    virtual ~CharsetRecog_8859_2();
     94    const char *getName() const override;
     95    virtual UBool match(InputText *det, CharsetMatch *results) const override;
     96 };
     97 
     98 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
     99 {
    100 public:
    101    virtual ~CharsetRecog_8859_5();
    102    const char *getName() const override;
    103 };
    104 
    105 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
    106 {
    107 public:
    108    virtual ~CharsetRecog_8859_6();
    109 
    110    const char *getName() const override;
    111 };
    112 
    113 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
    114 {
    115 public:
    116    virtual ~CharsetRecog_8859_7();
    117 
    118    const char *getName() const override;
    119 };
    120 
    121 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
    122 {
    123 public:
    124    virtual ~CharsetRecog_8859_8();
    125 
    126    virtual const char *getName() const override;
    127 };
    128 
    129 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
    130 {
    131 public:
    132    virtual ~CharsetRecog_8859_9();
    133 
    134    const char *getName() const override;
    135 };
    136 
    137 
    138 
    139 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
    140 {
    141 public:
    142    virtual ~CharsetRecog_8859_5_ru();
    143 
    144    const char *getLanguage() const override;
    145 
    146    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    147 };
    148 
    149 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
    150 {
    151 public:
    152    virtual ~CharsetRecog_8859_6_ar();
    153 
    154    const char *getLanguage() const override;
    155 
    156    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    157 };
    158 
    159 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
    160 {
    161 public:
    162    virtual ~CharsetRecog_8859_7_el();
    163 
    164    const char *getLanguage() const override;
    165 
    166    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    167 };
    168 
    169 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
    170 {
    171 public:
    172    virtual ~CharsetRecog_8859_8_I_he();
    173 
    174    const char *getName() const override;
    175 
    176    const char *getLanguage() const override;
    177 
    178    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    179 };
    180 
    181 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
    182 {
    183 public:
    184    virtual ~CharsetRecog_8859_8_he ();
    185 
    186    const char *getLanguage() const override;
    187 
    188    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    189 };
    190 
    191 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
    192 {
    193 public:
    194    virtual ~CharsetRecog_8859_9_tr ();
    195 
    196    const char *getLanguage() const override;
    197 
    198    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    199 };
    200 
    201 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
    202 {
    203 public:
    204    virtual ~CharsetRecog_windows_1256();
    205 
    206    const char *getName() const override;
    207 
    208    const char *getLanguage() const override;
    209 
    210    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    211 };
    212 
    213 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
    214 {
    215 public:
    216    virtual ~CharsetRecog_windows_1251();
    217 
    218    const char *getName() const override;
    219 
    220    const char *getLanguage() const override;
    221 
    222    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    223 };
    224 
    225 
    226 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
    227 {
    228 public:
    229    virtual ~CharsetRecog_KOI8_R();
    230 
    231    const char *getName() const override;
    232 
    233    const char *getLanguage() const override;
    234 
    235    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    236 };
    237 
    238 #if !UCONFIG_ONLY_HTML_CONVERSION
    239 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
    240 {
    241 public:
    242    virtual ~CharsetRecog_IBM424_he();
    243 
    244    const char *getLanguage() const override;
    245 };
    246 
    247 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
    248 public:
    249    virtual ~CharsetRecog_IBM424_he_rtl();
    250    
    251    const char *getName() const override;
    252    
    253    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    254 };
    255 
    256 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
    257    virtual ~CharsetRecog_IBM424_he_ltr();
    258    
    259    const char *getName() const override;
    260    
    261    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    262 };
    263 
    264 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
    265 {
    266 public:
    267    virtual ~CharsetRecog_IBM420_ar();
    268 
    269    const char *getLanguage() const override;
    270 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const override;
    271    
    272 };
    273 
    274 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
    275 public:
    276    virtual ~CharsetRecog_IBM420_ar_rtl();
    277    
    278    const char *getName() const override;
    279    
    280    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    281 };
    282 
    283 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
    284    virtual ~CharsetRecog_IBM420_ar_ltr();
    285    
    286    const char *getName() const override;
    287    
    288    virtual UBool match(InputText *det, CharsetMatch *results) const override;
    289 };
    290 #endif
    291 
    292 U_NAMESPACE_END
    293 
    294 #endif /* !UCONFIG_NO_CONVERSION */
    295 #endif /* __CSRSBCS_H */