tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utf16collationiterator.h (6413B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2010-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * utf16collationiterator.h
      9 *
     10 * created on: 2010oct27
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __UTF16COLLATIONITERATOR_H__
     15 #define __UTF16COLLATIONITERATOR_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "cmemory.h"
     22 #include "collation.h"
     23 #include "collationdata.h"
     24 #include "collationiterator.h"
     25 #include "normalizer2impl.h"
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 /**
     30 * UTF-16 collation element and character iterator.
     31 * Handles normalized UTF-16 text inline, with length or NUL-terminated.
     32 * Unnormalized text is handled by a subclass.
     33 */
     34 class U_I18N_API UTF16CollationIterator : public CollationIterator {
     35 public:
     36    UTF16CollationIterator(const CollationData *d, UBool numeric,
     37                           const char16_t *s, const char16_t *p, const char16_t *lim)
     38            : CollationIterator(d, numeric),
     39              start(s), pos(p), limit(lim) {}
     40 
     41    UTF16CollationIterator(const UTF16CollationIterator &other, const char16_t *newText);
     42 
     43    virtual ~UTF16CollationIterator();
     44 
     45    virtual bool operator==(const CollationIterator &other) const override;
     46 
     47    virtual void resetToOffset(int32_t newOffset) override;
     48 
     49    virtual int32_t getOffset() const override;
     50 
     51    void setText(const char16_t *s, const char16_t *lim) {
     52        reset();
     53        start = pos = s;
     54        limit = lim;
     55    }
     56 
     57    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
     58 
     59    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
     60 
     61 protected:
     62    // Copy constructor only for subclasses which set the pointers.
     63    UTF16CollationIterator(const UTF16CollationIterator &other)
     64            : CollationIterator(other),
     65              start(nullptr), pos(nullptr), limit(nullptr) {}
     66 
     67    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
     68 
     69    virtual char16_t handleGetTrailSurrogate() override;
     70 
     71    virtual UBool foundNULTerminator() override;
     72 
     73    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     74 
     75    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
     76 
     77    // UTF-16 string pointers.
     78    // limit can be nullptr for NUL-terminated strings.
     79    const char16_t *start, *pos, *limit;
     80 };
     81 
     82 /**
     83 * Incrementally checks the input text for FCD and normalizes where necessary.
     84 */
     85 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
     86 public:
     87    FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
     88                              const char16_t *s, const char16_t *p, const char16_t *lim)
     89            : UTF16CollationIterator(data, numeric, s, p, lim),
     90              rawStart(s), segmentStart(p), segmentLimit(nullptr), rawLimit(lim),
     91              nfcImpl(data->nfcImpl),
     92              checkDir(1) {}
     93 
     94    FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const char16_t *newText);
     95 
     96    virtual ~FCDUTF16CollationIterator();
     97 
     98    virtual bool operator==(const CollationIterator &other) const override;
     99 
    100    virtual void resetToOffset(int32_t newOffset) override;
    101 
    102    virtual int32_t getOffset() const override;
    103 
    104    virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
    105 
    106    virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
    107 
    108 protected:
    109    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
    110 
    111    virtual UBool foundNULTerminator() override;
    112 
    113    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
    114 
    115    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
    116 
    117 private:
    118    /**
    119     * Switches to forward checking if possible.
    120     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
    121     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
    122     */
    123    void switchToForward();
    124 
    125    /**
    126     * Extend the FCD text segment forward or normalize around pos.
    127     * To be called when checkDir > 0 && pos != limit.
    128     * @return true if success, checkDir == 0 and pos != limit
    129     */
    130    UBool nextSegment(UErrorCode &errorCode);
    131 
    132    /**
    133     * Switches to backward checking.
    134     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
    135     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
    136     */
    137    void switchToBackward();
    138 
    139    /**
    140     * Extend the FCD text segment backward or normalize around pos.
    141     * To be called when checkDir < 0 && pos != start.
    142     * @return true if success, checkDir == 0 and pos != start
    143     */
    144    UBool previousSegment(UErrorCode &errorCode);
    145 
    146    UBool normalize(const char16_t *from, const char16_t *to, UErrorCode &errorCode);
    147 
    148    // Text pointers: The input text is [rawStart, rawLimit[
    149    // where rawLimit can be nullptr for NUL-terminated text.
    150    //
    151    // checkDir > 0:
    152    //
    153    // The input text [segmentStart..pos[ passes the FCD check.
    154    // Moving forward checks incrementally.
    155    // segmentLimit is undefined. limit == rawLimit.
    156    //
    157    // checkDir < 0:
    158    // The input text [pos..segmentLimit[ passes the FCD check.
    159    // Moving backward checks incrementally.
    160    // segmentStart is undefined, start == rawStart.
    161    //
    162    // checkDir == 0:
    163    //
    164    // The input text [segmentStart..segmentLimit[ is being processed.
    165    // These pointers are at FCD boundaries.
    166    // Either this text segment already passes the FCD check
    167    // and segmentStart==start<=pos<=limit==segmentLimit,
    168    // or the current segment had to be normalized so that
    169    // [segmentStart..segmentLimit[ turned into the normalized string,
    170    // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
    171    const char16_t *rawStart;
    172    const char16_t *segmentStart;
    173    const char16_t *segmentLimit;
    174    // rawLimit==nullptr for a NUL-terminated string.
    175    const char16_t *rawLimit;
    176 
    177    const Normalizer2Impl &nfcImpl;
    178    UnicodeString normalized;
    179    // Direction of incremental FCD check. See comments before rawStart.
    180    int8_t checkDir;
    181 };
    182 
    183 U_NAMESPACE_END
    184 
    185 #endif  // !UCONFIG_NO_COLLATION
    186 #endif  // __UTF16COLLATIONITERATOR_H__