tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

transreg.h (17421B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (c) 2001-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   08/10/2001  aliu        Creation.
     10 **********************************************************************
     11 */
     12 #ifndef _TRANSREG_H
     13 #define _TRANSREG_H
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_TRANSLITERATION
     18 
     19 #include "unicode/uobject.h"
     20 #include "unicode/translit.h"
     21 #include "hash.h"
     22 #include "uvector.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 class TransliteratorEntry;
     27 class TransliteratorSpec;
     28 class UnicodeString;
     29 
     30 //------------------------------------------------------------------
     31 // TransliteratorAlias
     32 //------------------------------------------------------------------
     33 
     34 /**
     35 * A TransliteratorAlias object is returned by get() if the given ID
     36 * actually translates into something else.  The caller then invokes
     37 * the create() method on the alias to create the actual
     38 * transliterator, and deletes the alias.
     39 *
     40 * Why all the shenanigans?  To prevent circular calls between
     41 * the registry code and the transliterator code that deadlocks.
     42 */
     43 class TransliteratorAlias : public UMemory {
     44 public:
     45    /**
     46     * Construct a simple alias (type == SIMPLE)
     47     * @param aliasID the given id.
     48     */
     49    TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
     50 
     51    /**
     52     * Construct a compound RBT alias (type == COMPOUND)
     53     */
     54    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
     55                        UVector* adoptedTransliterators,
     56                        const UnicodeSet* compoundFilter);
     57 
     58    /**
     59     * Construct a rules alias (type = RULES)
     60     */
     61    TransliteratorAlias(const UnicodeString& theID,
     62                        const UnicodeString& rules,
     63                        UTransDirection dir);
     64 
     65    ~TransliteratorAlias();
     66 
     67    /**
     68     * The whole point of create() is that the caller must invoke
     69     * it when the registry mutex is NOT held, to prevent deadlock.
     70     * It may only be called once.
     71     *
     72     * Note: Only call create() if isRuleBased() returns false.
     73     *
     74     * This method must be called *outside* of the TransliteratorRegistry
     75     * mutex.
     76     */
     77    Transliterator* create(UParseError&, UErrorCode&);
     78 
     79    /**
     80     * Return true if this alias is rule-based.  If so, the caller
     81     * must call parse() on it, then call TransliteratorRegistry::reget().
     82     */
     83    UBool isRuleBased() const;
     84 
     85    /**
     86     * If isRuleBased() returns true, then the caller must call this
     87     * method, followed by TransliteratorRegistry::reget().  The latter
     88     * method must be called inside the TransliteratorRegistry mutex.
     89     *
     90     * Note: Only call parse() if isRuleBased() returns true.
     91     *
     92     * This method must be called *outside* of the TransliteratorRegistry
     93     * mutex, because it can instantiate Transliterators embedded in
     94     * the rules via the "&Latin-Arabic()" syntax.
     95     */
     96    void parse(TransliteratorParser& parser,
     97               UParseError& pe, UErrorCode& ec) const;
     98 
     99 private:
    100    // We actually come in three flavors:
    101    // 1. Simple alias
    102    //    Here aliasID is the alias string.  Everything else is
    103    //    null, zero, empty.
    104    // 2. CompoundRBT
    105    //    Here ID is the ID, aliasID is the idBlock, trans is the
    106    //    contained RBT, and idSplitPoint is the offset in aliasID
    107    //    where the contained RBT goes.  compoundFilter is the
    108    //    compound filter, and it is _not_ owned.
    109    // 3. Rules
    110    //    Here ID is the ID, aliasID is the rules string.
    111    //    idSplitPoint is the UTransDirection.
    112    UnicodeString ID;
    113    UnicodeString aliasesOrRules;
    114    UVector* transes; // owned
    115    const UnicodeSet* compoundFilter; // alias
    116    UTransDirection direction;
    117    enum { SIMPLE, COMPOUND, RULES } type;
    118 
    119    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
    120    TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
    121 };
    122 
    123 
    124 /**
    125 * A registry of system transliterators.  This is the data structure
    126 * that implements the mapping between transliterator IDs and the data
    127 * or function pointers used to create the corresponding
    128 * transliterators.  There is one instance of the registry that is
    129 * created statically.
    130 *
    131 * The registry consists of a dynamic component -- a hashtable -- and
    132 * a static component -- locale resource bundles.  The dynamic store
    133 * is semantically overlaid on the static store, so the static mapping
    134 * can be dynamically overridden.
    135 *
    136 * This is an internal class that is only used by Transliterator.
    137 * Transliterator maintains one static instance of this class and
    138 * delegates all registry-related operations to it.
    139 *
    140 * @author Alan Liu
    141 */
    142 class TransliteratorRegistry : public UMemory {
    143 
    144 public:
    145 
    146    /**
    147     * Constructor
    148     * @param status Output param set to success/failure code.
    149     */
    150    TransliteratorRegistry(UErrorCode& status);
    151 
    152    /**
    153     * Nonvirtual destructor -- this class is not subclassable.
    154     */
    155    ~TransliteratorRegistry();
    156 
    157    //------------------------------------------------------------------
    158    // Basic public API
    159    //------------------------------------------------------------------
    160 
    161    /**
    162     * Given a simple ID (forward direction, no inline filter, not
    163     * compound) attempt to instantiate it from the registry.  Return
    164     * 0 on failure.
    165     *
    166     * Return a non-nullptr aliasReturn value if the ID points to an alias.
    167     * We cannot instantiate it ourselves because the alias may contain
    168     * filters or compounds, which we do not understand.  Caller should
    169     * make aliasReturn nullptr before calling.
    170     * @param ID          the given ID
    171     * @param aliasReturn output param to receive TransliteratorAlias;
    172     *                    should be nullptr on entry
    173     * @param parseError  Struct to receive information on position
    174     *                    of error if an error is encountered
    175     * @param status      Output param set to success/failure code.
    176     */
    177    Transliterator* get(const UnicodeString& ID,
    178                        TransliteratorAlias*& aliasReturn,
    179                        UErrorCode& status);
    180 
    181    /**
    182     * The caller must call this after calling get(), if [a] calling get()
    183     * returns an alias, and [b] the alias is rule based.  In that
    184     * situation the caller must call alias->parse() to do the parsing
    185     * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
    186     * instantiating the transliterator.
    187     *
    188     * Note: Another alias might be returned by this method.
    189     *
    190     * This method (like all public methods of this class) must be called
    191     * from within the TransliteratorRegistry mutex.
    192     *
    193     * @param aliasReturn output param to receive TransliteratorAlias;
    194     *                    should be nullptr on entry
    195     */
    196    Transliterator* reget(const UnicodeString& ID,
    197                          TransliteratorParser& parser,
    198                          TransliteratorAlias*& aliasReturn,
    199                          UErrorCode& status);
    200 
    201    /**
    202     * Register a prototype (adopted).  This adds an entry to the
    203     * dynamic store, or replaces an existing entry.  Any entry in the
    204     * underlying static locale resource store is masked.
    205     */
    206    void put(Transliterator* adoptedProto,
    207             UBool visible,
    208             UErrorCode& ec);
    209 
    210    /**
    211     * Register an ID and a factory function pointer.  This adds an
    212     * entry to the dynamic store, or replaces an existing entry.  Any
    213     * entry in the underlying static locale resource store is masked.
    214     */
    215    void put(const UnicodeString& ID,
    216             Transliterator::Factory factory,
    217             Transliterator::Token context,
    218             UBool visible,
    219             UErrorCode& ec);
    220 
    221    /**
    222     * Register an ID and a resource name.  This adds an entry to the
    223     * dynamic store, or replaces an existing entry.  Any entry in the
    224     * underlying static locale resource store is masked.
    225     */
    226    void put(const UnicodeString& ID,
    227             const UnicodeString& resourceName,
    228             UTransDirection dir,
    229             UBool readonlyResourceAlias,
    230             UBool visible,
    231             UErrorCode& ec);
    232 
    233    /**
    234     * Register an ID and an alias ID.  This adds an entry to the
    235     * dynamic store, or replaces an existing entry.  Any entry in the
    236     * underlying static locale resource store is masked.
    237     */
    238    void put(const UnicodeString& ID,
    239             const UnicodeString& alias,
    240             UBool readonlyAliasAlias,
    241             UBool visible,
    242             UErrorCode& ec);
    243 
    244    /**
    245     * Unregister an ID.  This removes an entry from the dynamic store
    246     * if there is one.  The static locale resource store is
    247     * unaffected.
    248     * @param ID    the given ID.
    249     */
    250    void remove(const UnicodeString& ID);
    251 
    252    //------------------------------------------------------------------
    253    // Public ID and spec management
    254    //------------------------------------------------------------------
    255 
    256    /**
    257     * Return a StringEnumeration over the IDs currently registered
    258     * with the system.
    259     * @internal
    260     */
    261    StringEnumeration* getAvailableIDs() const;
    262 
    263    /**
    264     * == OBSOLETE - remove in ICU 3.4 ==
    265     * Return the number of IDs currently registered with the system.
    266     * To retrieve the actual IDs, call getAvailableID(i) with
    267     * i from 0 to countAvailableIDs() - 1.
    268     * @return the number of IDs currently registered with the system.
    269     * @internal
    270     */
    271    int32_t countAvailableIDs() const;
    272 
    273    /**
    274     * == OBSOLETE - remove in ICU 3.4 ==
    275     * Return the index-th available ID.  index must be between 0
    276     * and countAvailableIDs() - 1, inclusive.  If index is out of
    277     * range, the result of getAvailableID(0) is returned.
    278     * @param index the given index.
    279     * @return the index-th available ID.  index must be between 0
    280     *         and countAvailableIDs() - 1, inclusive.  If index is out of
    281     *         range, the result of getAvailableID(0) is returned.
    282     * @internal
    283     */
    284    const UnicodeString& getAvailableID(int32_t index) const;
    285 
    286    /**
    287     * Return the number of registered source specifiers.
    288     * @return the number of registered source specifiers.
    289     */
    290    int32_t countAvailableSources() const;
    291 
    292    /**
    293     * Return a registered source specifier.
    294     * @param index which specifier to return, from 0 to n-1, where
    295     * n = countAvailableSources()
    296     * @param result fill-in parameter to receive the source specifier.
    297     * If index is out of range, result will be empty.
    298     * @return reference to result
    299     */
    300    UnicodeString& getAvailableSource(int32_t index,
    301                                      UnicodeString& result) const;
    302 
    303    /**
    304     * Return the number of registered target specifiers for a given
    305     * source specifier.
    306     * @param source the given source specifier.
    307     * @return the number of registered target specifiers for a given
    308     *         source specifier.
    309     */
    310    int32_t countAvailableTargets(const UnicodeString& source) const;
    311 
    312    /**
    313     * Return a registered target specifier for a given source.
    314     * @param index which specifier to return, from 0 to n-1, where
    315     * n = countAvailableTargets(source)
    316     * @param source the source specifier
    317     * @param result fill-in parameter to receive the target specifier.
    318     * If source is invalid or if index is out of range, result will
    319     * be empty.
    320     * @return reference to result
    321     */
    322    UnicodeString& getAvailableTarget(int32_t index,
    323                                      const UnicodeString& source,
    324                                      UnicodeString& result) const;
    325 
    326    /**
    327     * Return the number of registered variant specifiers for a given
    328     * source-target pair.  There is always at least one variant: If
    329     * just source-target is registered, then the single variant
    330     * NO_VARIANT is returned.  If source-target/variant is registered
    331     * then that variant is returned.
    332     * @param source the source specifiers
    333     * @param target the target specifiers
    334     * @return the number of registered variant specifiers for a given
    335     *         source-target pair.
    336     */
    337    int32_t countAvailableVariants(const UnicodeString& source,
    338                                   const UnicodeString& target) const;
    339 
    340    /**
    341     * Return a registered variant specifier for a given source-target
    342     * pair.  If NO_VARIANT is one of the variants, then it will be
    343     * at index 0.
    344     * @param index which specifier to return, from 0 to n-1, where
    345     * n = countAvailableVariants(source, target)
    346     * @param source the source specifier
    347     * @param target the target specifier
    348     * @param result fill-in parameter to receive the variant
    349     * specifier.  If source is invalid or if target is invalid or if
    350     * index is out of range, result will be empty.
    351     * @return reference to result
    352     */
    353    UnicodeString& getAvailableVariant(int32_t index,
    354                                       const UnicodeString& source,
    355                                       const UnicodeString& target,
    356                                       UnicodeString& result) const;
    357 
    358 private:
    359 
    360    //----------------------------------------------------------------
    361    // Private implementation
    362    //----------------------------------------------------------------
    363 
    364    TransliteratorEntry* find(const UnicodeString& ID);
    365 
    366    TransliteratorEntry* find(UnicodeString& source,
    367                UnicodeString& target,
    368                UnicodeString& variant);
    369 
    370    TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
    371                              const TransliteratorSpec& trg,
    372                              const UnicodeString& variant) const;
    373 
    374    TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
    375                             const TransliteratorSpec& trg,
    376                             const UnicodeString& variant);
    377 
    378    static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
    379                               const TransliteratorSpec& specToFind,
    380                               const UnicodeString& variant,
    381                               UTransDirection direction);
    382 
    383    void registerEntry(const UnicodeString& source,
    384                       const UnicodeString& target,
    385                       const UnicodeString& variant,
    386                       TransliteratorEntry* adopted,
    387                       UBool visible);
    388 
    389    void registerEntry(const UnicodeString& ID,
    390                       TransliteratorEntry* adopted,
    391                       UBool visible);
    392 
    393    void registerEntry(const UnicodeString& ID,
    394                       const UnicodeString& source,
    395                       const UnicodeString& target,
    396                       const UnicodeString& variant,
    397                       TransliteratorEntry* adopted,
    398                       UBool visible);
    399 
    400    void registerSTV(const UnicodeString& source,
    401                     const UnicodeString& target,
    402                     const UnicodeString& variant);
    403 
    404    void removeSTV(const UnicodeString& source,
    405                   const UnicodeString& target,
    406                   const UnicodeString& variant);
    407 
    408    Transliterator* instantiateEntry(const UnicodeString& ID,
    409                                     TransliteratorEntry *entry,
    410                                     TransliteratorAlias*& aliasReturn,
    411                                     UErrorCode& status);
    412 
    413    /**
    414     * A StringEnumeration over the registered IDs in this object.
    415     */
    416    class Enumeration : public StringEnumeration {
    417    public:
    418        Enumeration(const TransliteratorRegistry& reg);
    419        virtual ~Enumeration();
    420        virtual int32_t count(UErrorCode& status) const override;
    421        virtual const UnicodeString* snext(UErrorCode& status) override;
    422        virtual void reset(UErrorCode& status) override;
    423        static UClassID U_EXPORT2 getStaticClassID();
    424        virtual UClassID getDynamicClassID() const override;
    425    private:
    426        int32_t pos;
    427        int32_t size;
    428        const TransliteratorRegistry& reg;
    429    };
    430    friend class Enumeration;
    431 
    432 private:
    433 
    434    /**
    435     * Dynamic registry mapping full IDs to Entry objects.  This
    436     * contains both public and internal entities.  The visibility is
    437     * controlled by whether an entry is listed in availableIDs and
    438     * specDAG or not.
    439     */
    440    Hashtable registry;
    441 
    442    /**
    443     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
    444     * target => variant bitmask)
    445     */
    446    Hashtable specDAG;
    447 
    448    /**
    449     * Vector of all variant names
    450     */
    451    UVector variantList;
    452 
    453    /**
    454     * Vector of public full IDs.
    455     */
    456    Hashtable availableIDs;
    457 
    458    TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
    459    TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
    460 };
    461 
    462 U_NAMESPACE_END
    463 
    464 U_CFUNC UBool utrans_transliterator_cleanup();
    465 
    466 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    467 
    468 #endif
    469 //eof