tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uitercollationiterator.h (4841B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2012-2016, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * uitercollationiterator.h
      9 *
     10 * created on: 2012sep23 (from utf16collationiterator.h)
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __UITERCOLLATIONITERATOR_H__
     15 #define __UITERCOLLATIONITERATOR_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/uiter.h"
     22 #include "cmemory.h"
     23 #include "collation.h"
     24 #include "collationdata.h"
     25 #include "collationiterator.h"
     26 #include "normalizer2impl.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 /**
     31 * UCharIterator-based collation element and character iterator.
     32 * Handles normalized text inline, with length or NUL-terminated.
     33 * Unnormalized text is handled by a subclass.
     34 */
     35 class U_I18N_API UIterCollationIterator : public CollationIterator {
     36 public:
     37    UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
     38            : CollationIterator(d, numeric), iter(ui) {}
     39 
     40    virtual ~UIterCollationIterator();
     41 
     42    virtual void resetToOffset(int32_t newOffset) override;
     43 
     44    virtual int32_t getOffset() const override;
     45 
     46    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
     47 
     48    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
     49 
     50 protected:
     51    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
     52 
     53    virtual char16_t handleGetTrailSurrogate() override;
     54 
     55    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     56 
     57    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     58 
     59    UCharIterator &iter;
     60 };
     61 
     62 /**
     63 * Incrementally checks the input text for FCD and normalizes where necessary.
     64 */
     65 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
     66 public:
     67    FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
     68            : UIterCollationIterator(data, numeric, ui),
     69              state(ITER_CHECK_FWD), start(startIndex),
     70              nfcImpl(data->nfcImpl) {}
     71 
     72    virtual ~FCDUIterCollationIterator();
     73 
     74    virtual void resetToOffset(int32_t newOffset) override;
     75 
     76    virtual int32_t getOffset() const override;
     77 
     78    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
     79 
     80    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
     81 
     82 protected:
     83    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
     84 
     85    virtual char16_t handleGetTrailSurrogate() override;
     86 
     87 
     88    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     89 
     90    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     91 
     92 private:
     93    /**
     94     * Switches to forward checking if possible.
     95     */
     96    void switchToForward();
     97 
     98    /**
     99     * Extends the FCD text segment forward or normalizes around pos.
    100     * @return true if success
    101     */
    102    UBool nextSegment(UErrorCode &errorCode);
    103 
    104    /**
    105     * Switches to backward checking.
    106     */
    107    void switchToBackward();
    108 
    109    /**
    110     * Extends the FCD text segment backward or normalizes around pos.
    111     * @return true if success
    112     */
    113    UBool previousSegment(UErrorCode &errorCode);
    114 
    115    UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
    116 
    117    enum State {
    118        /**
    119         * The input text [start..(iter index)[ passes the FCD check.
    120         * Moving forward checks incrementally.
    121         * pos & limit are undefined.
    122         */
    123        ITER_CHECK_FWD,
    124        /**
    125         * The input text [(iter index)..limit[ passes the FCD check.
    126         * Moving backward checks incrementally.
    127         * start & pos are undefined.
    128         */
    129        ITER_CHECK_BWD,
    130        /**
    131         * The input text [start..limit[ passes the FCD check.
    132         * pos tracks the current text index.
    133         */
    134        ITER_IN_FCD_SEGMENT,
    135        /**
    136         * The input text [start..limit[ failed the FCD check and was normalized.
    137         * pos tracks the current index in the normalized string.
    138         * The text iterator is at the limit index.
    139         */
    140        IN_NORM_ITER_AT_LIMIT,
    141        /**
    142         * The input text [start..limit[ failed the FCD check and was normalized.
    143         * pos tracks the current index in the normalized string.
    144         * The text iterator is at the start index.
    145         */
    146        IN_NORM_ITER_AT_START
    147    };
    148 
    149    State state;
    150 
    151    int32_t start;
    152    int32_t pos;
    153    int32_t limit;
    154 
    155    const Normalizer2Impl &nfcImpl;
    156    UnicodeString normalized;
    157 };
    158 
    159 U_NAMESPACE_END
    160 
    161 #endif  // !UCONFIG_NO_COLLATION
    162 #endif  // __UITERCOLLATIONITERATOR_H__