tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbt_set.h (5747B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (C) 1999-2007, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/17/99    aliu        Creation.
     10 **********************************************************************
     11 */
     12 #ifndef RBT_SET_H
     13 #define RBT_SET_H
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_TRANSLITERATION
     18 
     19 #include "unicode/uobject.h"
     20 #include "unicode/utrans.h"
     21 #include "uvector.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 class Replaceable;
     26 class TransliterationRule;
     27 class TransliterationRuleData;
     28 class UnicodeFilter;
     29 class UnicodeString;
     30 class UnicodeSet;
     31 
     32 /**
     33 * A set of rules for a <code>RuleBasedTransliterator</code>.
     34 * @author Alan Liu
     35 */
     36 class TransliterationRuleSet : public UMemory {
     37    /**
     38     * Vector of rules, in the order added.  This is used while the
     39     * rule set is getting built.  After that, freeze() reorders and
     40     * indexes the rules into rules[].  Any given rule is stored once
     41     * in ruleVector, and one or more times in rules[].  ruleVector
     42     * owns and deletes the rules.
     43     */
     44    UVector* ruleVector;
     45 
     46    /**
     47     * Sorted and indexed table of rules.  This is created by freeze()
     48     * from the rules in ruleVector.  It contains alias pointers to
     49     * the rules in ruleVector.  It is zero before freeze() is called
     50     * and non-zero thereafter.
     51     */
     52    TransliterationRule** rules;
     53 
     54    /**
     55     * Index table.  For text having a first character c, compute x = c&0xFF.
     56     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     57     * freeze().  Before freeze() is called it contains garbage.
     58     */
     59    int32_t index[257];
     60 
     61    /**
     62     * Length of the longest preceding context
     63     */
     64    int32_t maxContextLength;
     65 
     66 public:
     67 
     68    /**
     69     * Construct a new empty rule set.
     70     * @param status    Output parameter filled in with success or failure status.
     71     */
     72    TransliterationRuleSet(UErrorCode& status);
     73 
     74    /**
     75     * Copy constructor.
     76     */
     77    TransliterationRuleSet(const TransliterationRuleSet&);
     78 
     79    /**
     80     * Destructor.
     81     */
     82    virtual ~TransliterationRuleSet();
     83 
     84    /**
     85     * Change the data object that this rule belongs to.  Used
     86     * internally by the TransliterationRuleData copy constructor.
     87     * @param data    the new data value to be set.
     88     */
     89    void setData(const TransliterationRuleData* data);
     90 
     91    /**
     92     * Return the maximum context length.
     93     * @return the length of the longest preceding context.
     94     */
     95    virtual int32_t getMaximumContextLength() const;
     96 
     97    /**
     98     * Add a rule to this set.  Rules are added in order, and order is
     99     * significant.  The last call to this method must be followed by
    100     * a call to <code>freeze()</code> before the rule set is used.
    101     * This method must <em>not</em> be called after freeze() has been
    102     * called.
    103     *
    104     * @param adoptedRule the rule to add
    105     */
    106    virtual void addRule(TransliterationRule* adoptedRule,
    107                         UErrorCode& status);
    108 
    109    /**
    110     * Check this for masked rules and index it to optimize performance.
    111     * The sequence of operations is: (1) add rules to a set using
    112     * <code>addRule()</code>; (2) freeze the set using
    113     * <code>freeze()</code>; (3) use the rule set.  If
    114     * <code>addRule()</code> is called after calling this method, it
    115     * invalidates this object, and this method must be called again.
    116     * That is, <code>freeze()</code> may be called multiple times,
    117     * although for optimal performance it shouldn't be.
    118     * @param parseError A pointer to UParseError to receive information about errors
    119     *                   occurred.
    120     * @param status     Output parameter filled in with success or failure status.
    121     */
    122    virtual void freeze(UParseError& parseError, UErrorCode& status);
    123    
    124    /**
    125     * Transliterate the given text with the given UTransPosition
    126     * indices.  Return true if the transliteration should continue
    127     * or false if it should halt (because of a U_PARTIAL_MATCH match).
    128     * Note that false is only ever returned if isIncremental is true.
    129     * @param text the text to be transliterated
    130     * @param index the position indices, which will be updated
    131     * @param isIncremental if true, assume new text may be inserted
    132     * at index.limit, and return false if thrre is a partial match.
    133     * @return true unless a U_PARTIAL_MATCH has been obtained,
    134     * indicating that transliteration should stop until more text
    135     * arrives.
    136     */
    137    UBool transliterate(Replaceable& text,
    138                        UTransPosition& index,
    139                        UBool isIncremental);
    140 
    141    /**
    142     * Create rule strings that represents this rule set.
    143     * @param result string to receive the rule strings.  Current
    144     * contents will be deleted.
    145     * @param escapeUnprintable  True, will escape the unprintable characters
    146     * @return    A reference to 'result'.
    147     */
    148    virtual UnicodeString& toRules(UnicodeString& result,
    149                                   UBool escapeUnprintable) const;
    150 
    151    /**
    152     * Return the set of all characters that may be modified
    153     * (getTarget=false) or emitted (getTarget=true) by this set.
    154     */
    155    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
    156                   UBool getTarget) const;
    157 
    158 private:
    159 
    160    TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
    161 };
    162 
    163 U_NAMESPACE_END
    164 
    165 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    166 
    167 #endif