tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collationsets.h (4664B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2013-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collationsets.h
      9 *
     10 * created on: 2013feb09
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __COLLATIONSETS_H__
     15 #define __COLLATIONSETS_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/uniset.h"
     22 #include "collation.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 struct CollationData;
     27 
     28 /**
     29 * Finds the set of characters and strings that sort differently in the tailoring
     30 * from the base data.
     31 *
     32 * Every mapping in the tailoring needs to be compared to the base,
     33 * because some mappings are copied for optimization, and
     34 * all contractions for a character are copied if any contractions for that character
     35 * are added, modified or removed.
     36 *
     37 * It might be simpler to re-parse the rule string, but:
     38 * - That would require duplicating some of the from-rules builder code.
     39 * - That would make the runtime code depend on the builder.
     40 * - That would only work if we have the rule string, and we allow users to
     41 *   omit the rule string from data files.
     42 */
     43 class TailoredSet : public UMemory {
     44 public:
     45    TailoredSet(UnicodeSet *t)
     46            : data(nullptr), baseData(nullptr),
     47              tailored(t),
     48              suffix(nullptr),
     49              errorCode(U_ZERO_ERROR) {}
     50 
     51    void forData(const CollationData *d, UErrorCode &errorCode);
     52 
     53    /**
     54     * @return U_SUCCESS(errorCode) in C++, void in Java
     55     * @internal only public for access by callback
     56     */
     57    UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
     58 
     59 private:
     60    void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
     61    void comparePrefixes(UChar32 c, const char16_t *p, const char16_t *q);
     62    void compareContractions(UChar32 c, const char16_t *p, const char16_t *q);
     63 
     64    void addPrefixes(const CollationData *d, UChar32 c, const char16_t *p);
     65    void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
     66    void addContractions(UChar32 c, const char16_t *p);
     67    void addSuffix(UChar32 c, const UnicodeString &sfx);
     68    void add(UChar32 c);
     69 
     70    /** Prefixes are reversed in the data structure. */
     71    void setPrefix(const UnicodeString &pfx) {
     72        unreversedPrefix = pfx;
     73        unreversedPrefix.reverse();
     74    }
     75    void resetPrefix() {
     76        unreversedPrefix.remove();
     77    }
     78 
     79    const CollationData *data;
     80    const CollationData *baseData;
     81    UnicodeSet *tailored;
     82    UnicodeString unreversedPrefix;
     83    const UnicodeString *suffix;
     84    UErrorCode errorCode;
     85 };
     86 
     87 class ContractionsAndExpansions : public UMemory {
     88 public:
     89    class CESink : public UMemory {
     90    public:
     91        virtual ~CESink();
     92        virtual void handleCE(int64_t ce) = 0;
     93        virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
     94    };
     95 
     96    ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
     97            : data(nullptr),
     98              contractions(con), expansions(exp),
     99              sink(s),
    100              addPrefixes(prefixes),
    101              checkTailored(0),
    102              suffix(nullptr),
    103              errorCode(U_ZERO_ERROR) {}
    104 
    105    void forData(const CollationData *d, UErrorCode &errorCode);
    106    void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
    107 
    108    // all following: @internal, only public for access by callback
    109 
    110    void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
    111 
    112    void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
    113    void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
    114 
    115    void addExpansions(UChar32 start, UChar32 end);
    116    void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
    117 
    118    /** Prefixes are reversed in the data structure. */
    119    void setPrefix(const UnicodeString &pfx) {
    120        unreversedPrefix = pfx;
    121        unreversedPrefix.reverse();
    122    }
    123    void resetPrefix() {
    124        unreversedPrefix.remove();
    125    }
    126 
    127    const CollationData *data;
    128    UnicodeSet *contractions;
    129    UnicodeSet *expansions;
    130    CESink *sink;
    131    UBool addPrefixes;
    132    int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
    133    UnicodeSet tailored;
    134    UnicodeSet ranges;
    135    UnicodeString unreversedPrefix;
    136    const UnicodeString *suffix;
    137    int64_t ces[Collation::MAX_EXPANSION_LENGTH];
    138    UErrorCode errorCode;
    139 };
    140 
    141 U_NAMESPACE_END
    142 
    143 #endif  // !UCONFIG_NO_COLLATION
    144 #endif  // __COLLATIONSETS_H__