tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucsdet.cpp (4889B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ********************************************************************************
      5 *   Copyright (C) 2005-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ********************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_CONVERSION
     13 #include "unicode/ucsdet.h"
     14 #include "csdetect.h"
     15 #include "csmatch.h"
     16 #include "csrsbcs.h"
     17 #include "csrmbcs.h"
     18 #include "csrutf8.h"
     19 #include "csrucode.h"
     20 #include "csr2022.h"
     21 
     22 #include "cmemory.h"
     23 
     24 U_NAMESPACE_USE
     25 
     26 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
     27 #define DELETE_ARRAY(array) uprv_free((void *) (array))
     28 
     29 U_CDECL_BEGIN
     30 
     31 U_CAPI UCharsetDetector * U_EXPORT2
     32 ucsdet_open(UErrorCode   *status)
     33 {
     34    if(U_FAILURE(*status)) {
     35        return nullptr;
     36    }
     37 
     38    CharsetDetector* csd = new CharsetDetector(*status);
     39 
     40    if (U_FAILURE(*status)) {
     41        delete csd;
     42        csd = nullptr;
     43    }
     44 
     45    return (UCharsetDetector *) csd;
     46 }
     47 
     48 U_CAPI void U_EXPORT2
     49 ucsdet_close(UCharsetDetector *ucsd)
     50 {
     51    CharsetDetector *csd = (CharsetDetector *) ucsd;
     52    delete csd;
     53 }
     54 
     55 U_CAPI void U_EXPORT2
     56 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
     57 {
     58    if(U_FAILURE(*status)) {
     59        return;
     60    }
     61 
     62    ((CharsetDetector *) ucsd)->setText(textIn, len);
     63 }
     64 
     65 U_CAPI const char * U_EXPORT2
     66 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
     67 {
     68    if(U_FAILURE(*status)) {
     69        return nullptr;
     70    }
     71 
     72    return ((CharsetMatch *) ucsm)->getName();
     73 }
     74 
     75 U_CAPI int32_t U_EXPORT2
     76 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
     77 {
     78    if(U_FAILURE(*status)) {
     79        return 0;
     80    }
     81 
     82    return ((CharsetMatch *) ucsm)->getConfidence();
     83 }
     84 
     85 U_CAPI const char * U_EXPORT2
     86 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
     87 {
     88    if(U_FAILURE(*status)) {
     89        return nullptr;
     90    }
     91 
     92    return ((CharsetMatch *) ucsm)->getLanguage();
     93 }
     94 
     95 U_CAPI const UCharsetMatch * U_EXPORT2
     96 ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
     97 {
     98    if(U_FAILURE(*status)) {
     99        return nullptr;
    100    }
    101 
    102    return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
    103 }
    104 
    105 U_CAPI void U_EXPORT2
    106 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
    107 {
    108    if(U_FAILURE(*status)) {
    109        return;
    110    }
    111 
    112    ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
    113 }
    114 
    115 U_CAPI const UCharsetMatch**
    116 ucsdet_detectAll(UCharsetDetector *ucsd,
    117                 int32_t *maxMatchesFound, UErrorCode *status)
    118 {
    119    if(U_FAILURE(*status)) {
    120        return nullptr;
    121    }
    122 
    123    CharsetDetector *csd = (CharsetDetector *) ucsd;
    124 
    125    return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
    126 }
    127 
    128 // U_CAPI  const char * U_EXPORT2
    129 // ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
    130 // {
    131 //     if(U_FAILURE(*status)) {
    132 //         return 0;
    133 //     }
    134 //     return csd->getCharsetName(index,*status);
    135 // }
    136 
    137 // U_CAPI  int32_t U_EXPORT2
    138 // ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
    139 // {
    140 //     if(U_FAILURE(*status)) {
    141 //         return -1;
    142 //     }
    143 //     return UCharsetDetector::getDetectableCount();
    144 // }
    145 
    146 U_CAPI  UBool U_EXPORT2
    147 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
    148 {
    149    // todo: could use an error return...
    150    if (ucsd == nullptr) {
    151        return false;
    152    }
    153 
    154    return ((CharsetDetector *) ucsd)->getStripTagsFlag();
    155 }
    156 
    157 U_CAPI  UBool U_EXPORT2
    158 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
    159 {
    160    // todo: could use an error return...
    161    if (ucsd == nullptr) {
    162        return false;
    163    }
    164 
    165    CharsetDetector *csd = (CharsetDetector *) ucsd;
    166    UBool prev = csd->getStripTagsFlag();
    167 
    168    csd->setStripTagsFlag(filter);
    169 
    170    return prev;
    171 }
    172 
    173 U_CAPI  int32_t U_EXPORT2
    174 ucsdet_getUChars(const UCharsetMatch *ucsm,
    175                 char16_t *buf, int32_t cap, UErrorCode *status)
    176 {
    177    if(U_FAILURE(*status)) {
    178        return 0;
    179    }
    180 
    181    return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
    182 }
    183 
    184 U_CAPI void U_EXPORT2
    185 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
    186 {
    187    ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
    188 }
    189 
    190 U_CAPI  UEnumeration * U_EXPORT2
    191 ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
    192 {
    193    return CharsetDetector::getAllDetectableCharsets(*status);
    194 }
    195 
    196 U_CAPI UEnumeration * U_EXPORT2
    197 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
    198 {
    199    return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
    200 }
    201 
    202 U_CDECL_END
    203 
    204 
    205 #endif