tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

norm2allmodes.h (15445B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * norm2allmodes.h
      9 *
     10 * created on: 2014sep07
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __NORM2ALLMODES_H__
     15 #define __NORM2ALLMODES_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/edits.h"
     22 #include "unicode/normalizer2.h"
     23 #include "unicode/stringoptions.h"
     24 #include "unicode/unistr.h"
     25 #include "cpputils.h"
     26 #include "normalizer2impl.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 // Intermediate class:
     31 // Has Normalizer2Impl and does boilerplate argument checking and setup.
     32 class Normalizer2WithImpl : public Normalizer2 {
     33 public:
     34    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
     35    virtual ~Normalizer2WithImpl();
     36 
     37    // normalize
     38    virtual UnicodeString &
     39    normalize(const UnicodeString &src,
     40              UnicodeString &dest,
     41              UErrorCode &errorCode) const override {
     42        if(U_FAILURE(errorCode)) {
     43            dest.setToBogus();
     44            return dest;
     45        }
     46        const char16_t *sArray=src.getBuffer();
     47        if(&dest==&src || sArray==nullptr) {
     48            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     49            dest.setToBogus();
     50            return dest;
     51        }
     52        dest.remove();
     53        ReorderingBuffer buffer(impl, dest);
     54        if(buffer.init(src.length(), errorCode)) {
     55            normalize(sArray, sArray+src.length(), buffer, errorCode);
     56        }
     57        return dest;
     58    }
     59    virtual void
     60    normalize(const char16_t *src, const char16_t *limit,
     61              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
     62 
     63    // normalize and append
     64    virtual UnicodeString &
     65    normalizeSecondAndAppend(UnicodeString &first,
     66                             const UnicodeString &second,
     67                             UErrorCode &errorCode) const override {
     68        return normalizeSecondAndAppend(first, second, true, errorCode);
     69    }
     70    virtual UnicodeString &
     71    append(UnicodeString &first,
     72           const UnicodeString &second,
     73           UErrorCode &errorCode) const override {
     74        return normalizeSecondAndAppend(first, second, false, errorCode);
     75    }
     76    UnicodeString &
     77    normalizeSecondAndAppend(UnicodeString &first,
     78                             const UnicodeString &second,
     79                             UBool doNormalize,
     80                             UErrorCode &errorCode) const {
     81        uprv_checkCanGetBuffer(first, errorCode);
     82        if(U_FAILURE(errorCode)) {
     83            return first;
     84        }
     85        const char16_t *secondArray=second.getBuffer();
     86        if(&first==&second || secondArray==nullptr) {
     87            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     88            return first;
     89        }
     90        int32_t firstLength=first.length();
     91        UnicodeString safeMiddle;
     92        {
     93            ReorderingBuffer buffer(impl, first);
     94            if(buffer.init(firstLength+second.length(), errorCode)) {
     95                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
     96                                   safeMiddle, buffer, errorCode);
     97            }
     98        }  // The ReorderingBuffer destructor finalizes the first string.
     99        if(U_FAILURE(errorCode)) {
    100            // Restore the modified suffix of the first string.
    101            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
    102        }
    103        return first;
    104    }
    105    virtual void
    106    normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
    107                       UnicodeString &safeMiddle,
    108                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    109    virtual UBool
    110    getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
    111        char16_t buffer[4];
    112        int32_t length;
    113        const char16_t *d=impl.getDecomposition(c, buffer, length);
    114        if(d==nullptr) {
    115            return false;
    116        }
    117        if(d==buffer) {
    118            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
    119        } else {
    120            decomposition.setTo(false, d, length);  // read-only alias
    121        }
    122        return true;
    123    }
    124    virtual UBool
    125    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
    126        char16_t buffer[30];
    127        int32_t length;
    128        const char16_t *d=impl.getRawDecomposition(c, buffer, length);
    129        if(d==nullptr) {
    130            return false;
    131        }
    132        if(d==buffer) {
    133            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
    134        } else {
    135            decomposition.setTo(false, d, length);  // read-only alias
    136        }
    137        return true;
    138    }
    139    virtual UChar32
    140    composePair(UChar32 a, UChar32 b) const override {
    141        return impl.composePair(a, b);
    142    }
    143 
    144    virtual uint8_t
    145    getCombiningClass(UChar32 c) const override {
    146        return impl.getCC(impl.getNorm16(c));
    147    }
    148 
    149    // quick checks
    150    virtual UBool
    151    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
    152        if(U_FAILURE(errorCode)) {
    153            return false;
    154        }
    155        const char16_t *sArray=s.getBuffer();
    156        if(sArray==nullptr) {
    157            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    158            return false;
    159        }
    160        const char16_t *sLimit=sArray+s.length();
    161        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    162    }
    163    virtual UNormalizationCheckResult
    164    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
    165        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    166    }
    167    virtual int32_t
    168    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
    169        if(U_FAILURE(errorCode)) {
    170            return 0;
    171        }
    172        const char16_t *sArray=s.getBuffer();
    173        if(sArray==nullptr) {
    174            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    175            return 0;
    176        }
    177        return static_cast<int32_t>(spanQuickCheckYes(sArray, sArray + s.length(), errorCode) - sArray);
    178    }
    179    virtual const char16_t *
    180    spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
    181 
    182    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
    183        return UNORM_YES;
    184    }
    185 
    186    const Normalizer2Impl &impl;
    187 };
    188 
    189 class DecomposeNormalizer2 : public Normalizer2WithImpl {
    190 public:
    191    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    192    virtual ~DecomposeNormalizer2();
    193 
    194 private:
    195    virtual void
    196    normalize(const char16_t *src, const char16_t *limit,
    197              ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    198        impl.decompose(src, limit, &buffer, errorCode);
    199    }
    200    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    201    virtual void
    202    normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
    203                       UnicodeString &safeMiddle,
    204                       ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    205        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    206    }
    207 
    208    void
    209    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
    210                  Edits *edits, UErrorCode &errorCode) const override {
    211        if (U_FAILURE(errorCode)) {
    212            return;
    213        }
    214        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
    215            edits->reset();
    216        }
    217        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
    218        impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
    219        sink.Flush();
    220    }
    221    virtual UBool
    222    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
    223        if(U_FAILURE(errorCode)) {
    224            return false;
    225        }
    226        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
    227        const uint8_t *sLimit = s + sp.length();
    228        return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
    229    }
    230 
    231    virtual const char16_t *
    232    spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
    233        return impl.decompose(src, limit, nullptr, errorCode);
    234    }
    235    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    236    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
    237        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    238    }
    239    virtual UBool hasBoundaryBefore(UChar32 c) const override {
    240        return impl.hasDecompBoundaryBefore(c);
    241    }
    242    virtual UBool hasBoundaryAfter(UChar32 c) const override {
    243        return impl.hasDecompBoundaryAfter(c);
    244    }
    245    virtual UBool isInert(UChar32 c) const override {
    246        return impl.isDecompInert(c);
    247    }
    248 };
    249 
    250 class ComposeNormalizer2 : public Normalizer2WithImpl {
    251 public:
    252    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
    253        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    254    virtual ~ComposeNormalizer2();
    255 
    256 private:
    257    virtual void
    258    normalize(const char16_t *src, const char16_t *limit,
    259              ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    260        impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
    261    }
    262    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    263 
    264    void
    265    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
    266                  Edits *edits, UErrorCode &errorCode) const override {
    267        if (U_FAILURE(errorCode)) {
    268            return;
    269        }
    270        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
    271            edits->reset();
    272        }
    273        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
    274        impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
    275                         &sink, edits, errorCode);
    276        sink.Flush();
    277    }
    278 
    279    virtual void
    280    normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
    281                       UnicodeString &safeMiddle,
    282                       ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    283        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    284    }
    285 
    286    virtual UBool
    287    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
    288        if(U_FAILURE(errorCode)) {
    289            return false;
    290        }
    291        const char16_t *sArray=s.getBuffer();
    292        if(sArray==nullptr) {
    293            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    294            return false;
    295        }
    296        UnicodeString temp;
    297        ReorderingBuffer buffer(impl, temp);
    298        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
    299            return false;
    300        }
    301        return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
    302    }
    303    virtual UBool
    304    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
    305        if(U_FAILURE(errorCode)) {
    306            return false;
    307        }
    308        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
    309        return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
    310    }
    311    virtual UNormalizationCheckResult
    312    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
    313        if(U_FAILURE(errorCode)) {
    314            return UNORM_MAYBE;
    315        }
    316        const char16_t *sArray=s.getBuffer();
    317        if(sArray==nullptr) {
    318            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    319            return UNORM_MAYBE;
    320        }
    321        UNormalizationCheckResult qcResult=UNORM_YES;
    322        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
    323        return qcResult;
    324    }
    325    virtual const char16_t *
    326    spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
    327        return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
    328    }
    329    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    330    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
    331        return impl.getCompQuickCheck(impl.getNorm16(c));
    332    }
    333    virtual UBool hasBoundaryBefore(UChar32 c) const override {
    334        return impl.hasCompBoundaryBefore(c);
    335    }
    336    virtual UBool hasBoundaryAfter(UChar32 c) const override {
    337        return impl.hasCompBoundaryAfter(c, onlyContiguous);
    338    }
    339    virtual UBool isInert(UChar32 c) const override {
    340        return impl.isCompInert(c, onlyContiguous);
    341    }
    342 
    343    const UBool onlyContiguous;
    344 };
    345 
    346 class FCDNormalizer2 : public Normalizer2WithImpl {
    347 public:
    348    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    349    virtual ~FCDNormalizer2();
    350 
    351 private:
    352    virtual void
    353    normalize(const char16_t *src, const char16_t *limit,
    354              ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    355        impl.makeFCD(src, limit, &buffer, errorCode);
    356    }
    357    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    358    virtual void
    359    normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
    360                       UnicodeString &safeMiddle,
    361                       ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
    362        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    363    }
    364    virtual const char16_t *
    365    spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
    366        return impl.makeFCD(src, limit, nullptr, errorCode);
    367    }
    368    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    369    virtual UBool hasBoundaryBefore(UChar32 c) const override {
    370        return impl.hasFCDBoundaryBefore(c);
    371    }
    372    virtual UBool hasBoundaryAfter(UChar32 c) const override {
    373        return impl.hasFCDBoundaryAfter(c);
    374    }
    375    virtual UBool isInert(UChar32 c) const override {
    376        return impl.isFCDInert(c);
    377    }
    378 };
    379 
    380 struct Norm2AllModes : public UMemory {
    381    Norm2AllModes(Normalizer2Impl *i)
    382            : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
    383    ~Norm2AllModes();
    384 
    385    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
    386    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
    387    static Norm2AllModes *createInstance(const char *packageName,
    388                                         const char *name,
    389                                         UErrorCode &errorCode);
    390 
    391    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
    392    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
    393    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
    394    static const Norm2AllModes *getNFKC_SCFInstance(UErrorCode &errorCode);
    395 
    396    Normalizer2Impl *impl;
    397    ComposeNormalizer2 comp;
    398    DecomposeNormalizer2 decomp;
    399    FCDNormalizer2 fcd;
    400    ComposeNormalizer2 fcc;
    401 };
    402 
    403 U_NAMESPACE_END
    404 
    405 #endif  // !UCONFIG_NO_NORMALIZATION
    406 #endif  // __NORM2ALLMODES_H__