tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

coleitr.h (14114B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *   Copyright (C) 1997-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ******************************************************************************
      8 */
      9 
     10 /**
     11 * \file 
     12 * \brief C++ API: Collation Element Iterator.
     13 */
     14 
     15 /**
     16 * File coleitr.h
     17 *
     18 * Created by: Helena Shih
     19 *
     20 * Modification History:
     21 *
     22 *  Date       Name        Description
     23 *
     24 *  8/18/97    helena      Added internal API documentation.
     25 * 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
     26 * 12/10/99    aliu        Ported Thai collation support from Java.
     27 * 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
     28 * 02/19/01    swquek      Removed CollationElementsIterator() since it is 
     29 *                         private constructor and no calls are made to it
     30 * 2012-2014   markus      Rewritten in C++ again.
     31 */
     32 
     33 #ifndef COLEITR_H
     34 #define COLEITR_H
     35 
     36 #include "unicode/utypes.h"
     37 
     38 #if U_SHOW_CPLUSPLUS_API
     39 
     40 #if !UCONFIG_NO_COLLATION
     41 
     42 #include "unicode/unistr.h"
     43 #include "unicode/uobject.h"
     44 
     45 struct UCollationElements;
     46 struct UHashtable;
     47 
     48 U_NAMESPACE_BEGIN
     49 
     50 struct CollationData;
     51 
     52 class CharacterIterator;
     53 class CollationIterator;
     54 class RuleBasedCollator;
     55 class UCollationPCE;
     56 class UVector32;
     57 
     58 /**
     59 * The CollationElementIterator class is used as an iterator to walk through     
     60 * each character of an international string. Use the iterator to return the
     61 * ordering priority of the positioned character. The ordering priority of a 
     62 * character, which we refer to as a key, defines how a character is collated in 
     63 * the given collation object.
     64 * For example, consider the following in Slovak and in traditional Spanish collation:
     65 * <pre>
     66 *        "ca" -> the first key is key('c') and second key is key('a').
     67 *        "cha" -> the first key is key('ch') and second key is key('a').</pre>
     68 * And in German phonebook collation,
     69 * <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
     70 *        the third key is key('b'). \endhtmlonly </pre>
     71 * The key of a character, is an integer composed of primary order(short),
     72 * secondary order(char), and tertiary order(char). Java strictly defines the 
     73 * size and signedness of its primitive data types. Therefore, the static
     74 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
     75 * int32_t to ensure the correctness of the key value.
     76 * <p>Example of the iterator usage: (without error checking)
     77 * <pre>
     78 * \code
     79 *   void CollationElementIterator_Example()
     80 *   {
     81 *       UnicodeString str = "This is a test";
     82 *       UErrorCode success = U_ZERO_ERROR;
     83 *       RuleBasedCollator* rbc =
     84 *           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
     85 *       CollationElementIterator* c =
     86 *           rbc->createCollationElementIterator( str );
     87 *       int32_t order = c->next(success);
     88 *       c->reset();
     89 *       order = c->previous(success);
     90 *       delete c;
     91 *       delete rbc;
     92 *   }
     93 * \endcode
     94 * </pre>
     95 * <p>
     96 * The method next() returns the collation order of the next character based on
     97 * the comparison level of the collator. The method previous() returns the
     98 * collation order of the previous character based on the comparison level of
     99 * the collator. The Collation Element Iterator moves only in one direction
    100 * between calls to reset(), setOffset(), or setText(). That is, next() 
    101 * and previous() can not be inter-used. Whenever previous() is to be called after 
    102 * next() or vice versa, reset(), setOffset() or setText() has to be called first
    103 * to reset the status, shifting pointers to either the end or the start of
    104 * the string (reset() or setText()), or the specified position (setOffset()).
    105 * Hence at the next call of next() or previous(), the first or last collation order,
    106 * or collation order at the specified position will be returned. If a change of
    107 * direction is done without one of these calls, the result is undefined.
    108 * <p>
    109 * The result of a forward iterate (next()) and reversed result of the backward
    110 * iterate (previous()) on the same string are equivalent, if collation orders
    111 * with the value 0 are ignored.
    112 * Character based on the comparison level of the collator.  A collation order 
    113 * consists of primary order, secondary order and tertiary order.  The data 
    114 * type of the collation order is <strong>int32_t</strong>. 
    115 *
    116 * Note, CollationElementIterator should not be subclassed.
    117 * @see     Collator
    118 * @see     RuleBasedCollator
    119 * @version 1.8 Jan 16 2001
    120 */
    121 class U_I18N_API CollationElementIterator final : public UObject {
    122 public: 
    123 
    124    // CollationElementIterator public data member ------------------------------
    125 
    126    enum {
    127        /**
    128         * NULLORDER indicates that an error has occurred while processing
    129         * @stable ICU 2.0
    130         */
    131        NULLORDER = static_cast<int32_t>(0xffffffff)
    132    };
    133 
    134    // CollationElementIterator public constructor/destructor -------------------
    135 
    136    /**
    137    * Copy constructor.
    138    *
    139    * @param other    the object to be copied from
    140    * @stable ICU 2.0
    141    */
    142    CollationElementIterator(const CollationElementIterator& other);
    143 
    144    /** 
    145    * Destructor
    146    * @stable ICU 2.0
    147    */
    148    virtual ~CollationElementIterator();
    149 
    150    // CollationElementIterator public methods ----------------------------------
    151 
    152    /**
    153    * Returns true if "other" is the same as "this"
    154    *
    155    * @param other    the object to be compared
    156    * @return         true if "other" is the same as "this"
    157    * @stable ICU 2.0
    158    */
    159    bool operator==(const CollationElementIterator& other) const;
    160 
    161    /**
    162    * Returns true if "other" is not the same as "this".
    163    *
    164    * @param other    the object to be compared
    165    * @return         true if "other" is not the same as "this"
    166    * @stable ICU 2.0
    167    */
    168    bool operator!=(const CollationElementIterator& other) const;
    169 
    170    /**
    171    * Resets the cursor to the beginning of the string.
    172    * @stable ICU 2.0
    173    */
    174    void reset();
    175 
    176    /**
    177    * Gets the ordering priority of the next character in the string.
    178    * @param status the error code status.
    179    * @return the next character's ordering. otherwise returns NULLORDER if an 
    180    *         error has occurred or if the end of string has been reached
    181    * @stable ICU 2.0
    182    */
    183    int32_t next(UErrorCode& status);
    184 
    185    /**
    186    * Get the ordering priority of the previous collation element in the string.
    187    * @param status the error code status.
    188    * @return the previous element's ordering. otherwise returns NULLORDER if an 
    189    *         error has occurred or if the start of string has been reached
    190    * @stable ICU 2.0
    191    */
    192    int32_t previous(UErrorCode& status);
    193 
    194    /**
    195    * Gets the primary order of a collation order.
    196    * @param order the collation order
    197    * @return the primary order of a collation order.
    198    * @stable ICU 2.0
    199    */
    200    static inline int32_t primaryOrder(int32_t order);
    201 
    202    /**
    203    * Gets the secondary order of a collation order.
    204    * @param order the collation order
    205    * @return the secondary order of a collation order.
    206    * @stable ICU 2.0
    207    */
    208    static inline int32_t secondaryOrder(int32_t order);
    209 
    210    /**
    211    * Gets the tertiary order of a collation order.
    212    * @param order the collation order
    213    * @return the tertiary order of a collation order.
    214    * @stable ICU 2.0
    215    */
    216    static inline int32_t tertiaryOrder(int32_t order);
    217 
    218    /**
    219    * Return the maximum length of any expansion sequences that end with the 
    220    * specified comparison order.
    221    * @param order a collation order returned by previous or next.
    222    * @return maximum size of the expansion sequences ending with the collation 
    223    *         element or 1 if collation element does not occur at the end of any 
    224    *         expansion sequence
    225    * @stable ICU 2.0
    226    */
    227    int32_t getMaxExpansion(int32_t order) const;
    228 
    229    /**
    230    * Gets the comparison order in the desired strength. Ignore the other
    231    * differences.
    232    * @param order The order value
    233    * @stable ICU 2.0
    234    */
    235    int32_t strengthOrder(int32_t order) const;
    236 
    237    /**
    238    * Sets the source string.
    239    * @param str the source string.
    240    * @param status the error code status.
    241    * @stable ICU 2.0
    242    */
    243    void setText(const UnicodeString& str, UErrorCode& status);
    244 
    245    /**
    246    * Sets the source string.
    247    * @param str the source character iterator.
    248    * @param status the error code status.
    249    * @stable ICU 2.0
    250    */
    251    void setText(CharacterIterator& str, UErrorCode& status);
    252 
    253    /**
    254    * Checks if a comparison order is ignorable.
    255    * @param order the collation order.
    256    * @return true if a character is ignorable, false otherwise.
    257    * @stable ICU 2.0
    258    */
    259    static inline UBool isIgnorable(int32_t order);
    260 
    261    /**
    262    * Gets the offset of the currently processed character in the source string.
    263    * @return the offset of the character.
    264    * @stable ICU 2.0
    265    */
    266    int32_t getOffset() const;
    267 
    268    /**
    269    * Sets the offset of the currently processed character in the source string.
    270    * @param newOffset the new offset.
    271    * @param status the error code status.
    272    * @return the offset of the character.
    273    * @stable ICU 2.0
    274    */
    275    void setOffset(int32_t newOffset, UErrorCode& status);
    276 
    277    /**
    278    * ICU "poor man's RTTI", returns a UClassID for the actual class.
    279    *
    280    * @stable ICU 2.2
    281    */
    282    virtual UClassID getDynamicClassID() const override;
    283 
    284    /**
    285    * ICU "poor man's RTTI", returns a UClassID for this class.
    286    *
    287    * @stable ICU 2.2
    288    */
    289    static UClassID U_EXPORT2 getStaticClassID();
    290 
    291 #ifndef U_HIDE_INTERNAL_API
    292    /** @internal */
    293    static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) {
    294        return reinterpret_cast<CollationElementIterator *>(uc);
    295    }
    296    /** @internal */
    297    static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) {
    298        return reinterpret_cast<const CollationElementIterator *>(uc);
    299    }
    300    /** @internal */
    301    inline UCollationElements *toUCollationElements() {
    302        return reinterpret_cast<UCollationElements *>(this);
    303    }
    304    /** @internal */
    305    inline const UCollationElements *toUCollationElements() const {
    306        return reinterpret_cast<const UCollationElements *>(this);
    307    }
    308 #endif  // U_HIDE_INTERNAL_API
    309 
    310 private:
    311    friend class RuleBasedCollator;
    312    friend class UCollationPCE;
    313 
    314    /**
    315    * CollationElementIterator constructor. This takes the source string and the 
    316    * collation object. The cursor will walk thru the source string based on the 
    317    * predefined collation rules. If the source string is empty, NULLORDER will 
    318    * be returned on the calls to next().
    319    * @param sourceText    the source string.
    320    * @param order         the collation object.
    321    * @param status        the error code status.
    322    */
    323    CollationElementIterator(const UnicodeString& sourceText,
    324        const RuleBasedCollator* order, UErrorCode& status);
    325    // Note: The constructors should take settings & tailoring, not a collator,
    326    // to avoid circular dependencies.
    327    // However, for operator==() we would need to be able to compare tailoring data for equality
    328    // without making CollationData or CollationTailoring depend on TailoredSet.
    329    // (See the implementation of RuleBasedCollator::operator==().)
    330    // That might require creating an intermediate class that would be used
    331    // by both CollationElementIterator and RuleBasedCollator
    332    // but only contain the part of RBC== related to data and rules.
    333 
    334    /**
    335    * CollationElementIterator constructor. This takes the source string and the 
    336    * collation object.  The cursor will walk thru the source string based on the 
    337    * predefined collation rules.  If the source string is empty, NULLORDER will 
    338    * be returned on the calls to next().
    339    * @param sourceText    the source string.
    340    * @param order         the collation object.
    341    * @param status        the error code status.
    342    */
    343    CollationElementIterator(const CharacterIterator& sourceText,
    344        const RuleBasedCollator* order, UErrorCode& status);
    345 
    346    /**
    347    * Assignment operator
    348    *
    349    * @param other    the object to be copied
    350    */
    351    const CollationElementIterator&
    352        operator=(const CollationElementIterator& other);
    353 
    354    CollationElementIterator() = delete; // default constructor not implemented
    355 
    356    /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
    357    inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
    358 
    359    static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
    360 
    361    static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
    362 
    363    // CollationElementIterator private data members ----------------------------
    364 
    365    CollationIterator *iter_;  // owned
    366    const RuleBasedCollator *rbc_;  // aliased
    367    uint32_t otherHalf_;
    368    /**
    369     * <0: backwards; 0: just after reset() (previous() begins from end);
    370     * 1: just after setOffset(); >1: forward
    371     */
    372    int8_t dir_;
    373    /**
    374     * Stores offsets from expansions and from unsafe-backwards iteration,
    375     * so that getOffset() returns intermediate offsets for the CEs
    376     * that are consistent with forward iteration.
    377     */
    378    UVector32 *offsets_;
    379 
    380    UnicodeString string_;
    381 };
    382 
    383 // CollationElementIterator inline method definitions --------------------------
    384 
    385 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
    386 {
    387    return (order >> 16) & 0xffff;
    388 }
    389 
    390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
    391 {
    392    return (order >> 8) & 0xff;
    393 }
    394 
    395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
    396 {
    397    return order & 0xff;
    398 }
    399 
    400 inline UBool CollationElementIterator::isIgnorable(int32_t order)
    401 {
    402    return (order & 0xffff0000) == 0;
    403 }
    404 
    405 U_NAMESPACE_END
    406 
    407 #endif /* #if !UCONFIG_NO_COLLATION */
    408 
    409 #endif /* U_SHOW_CPLUSPLUS_API */
    410 
    411 #endif