tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

brkeng.h (9981B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /**
      4 ************************************************************************************
      5 * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
      6 * All Rights Reserved.                                                             *
      7 ************************************************************************************
      8 */
      9 
     10 #ifndef BRKENG_H
     11 #define BRKENG_H
     12 
     13 #include "unicode/umisc.h"
     14 #include "unicode/utypes.h"
     15 #include "unicode/uobject.h"
     16 #include "unicode/utext.h"
     17 #include "unicode/uscript.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 class UnicodeSet;
     22 class UStack;
     23 class UVector32;
     24 class DictionaryMatcher;
     25 class ExternalBreakEngine;
     26 
     27 /*******************************************************************
     28 * LanguageBreakEngine
     29 */
     30 
     31 /**
     32 * <p>LanguageBreakEngines implement language-specific knowledge for
     33 * finding text boundaries within a run of characters belonging to a
     34 * specific set. The boundaries will be of a specific kind, e.g. word,
     35 * line, etc.</p>
     36 *
     37 * <p>LanguageBreakEngines should normally be implemented so as to
     38 * be shared between threads without locking.</p>
     39 */
     40 class LanguageBreakEngine : public UObject {
     41 public:
     42 
     43  /**
     44   * <p>Default constructor.</p>
     45   *
     46   */
     47  LanguageBreakEngine();
     48 
     49  /**
     50   * <p>Virtual destructor.</p>
     51   */
     52  virtual ~LanguageBreakEngine();
     53 
     54 /**
     55  * <p>Indicate whether this engine handles a particular character for
     56  * a particular kind of break.</p>
     57  *
     58  * @param c A character which begins a run that the engine might handle
     59  * @param locale The locale.
     60  * @return true if this engine handles the particular character and break
     61  * type.
     62  */
     63  virtual UBool handles(UChar32 c, const char* locale) const = 0;
     64 
     65 /**
     66  * <p>Find any breaks within a run in the supplied text.</p>
     67  *
     68  * @param text A UText representing the text. The
     69  * iterator is left at the end of the run of characters which the engine
     70  * is capable of handling.
     71  * @param startPos The start of the run within the supplied text.
     72  * @param endPos The end of the run within the supplied text.
     73  * @param foundBreaks A Vector of int32_t to receive the breaks.
     74  * @param status Information on any errors encountered.
     75  * @return The number of breaks found.
     76  */
     77  virtual int32_t findBreaks( UText *text,
     78                              int32_t startPos,
     79                              int32_t endPos,
     80                              UVector32 &foundBreaks,
     81                              UBool isPhraseBreaking,
     82                              UErrorCode &status) const = 0;
     83 
     84 };
     85 
     86 /*******************************************************************
     87 * BreakEngineWrapper
     88 */
     89 
     90 /**
     91 * <p>BreakEngineWrapper implement LanguageBreakEngine by
     92 * a thin wrapper that delegate the task to ExternalBreakEngine
     93 * </p>
     94 */
     95 class BreakEngineWrapper : public  LanguageBreakEngine {
     96 public:
     97 
     98  BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);
     99 
    100  virtual ~BreakEngineWrapper();
    101 
    102  virtual UBool handles(UChar32 c, const char* locale) const override;
    103 
    104  virtual int32_t findBreaks( UText *text,
    105                              int32_t startPos,
    106                              int32_t endPos,
    107                              UVector32 &foundBreaks,
    108                              UBool isPhraseBreaking,
    109                              UErrorCode &status) const override;
    110 
    111 private:
    112  LocalPointer<ExternalBreakEngine> delegate;
    113 };
    114 
    115 /*******************************************************************
    116 * LanguageBreakFactory
    117 */
    118 
    119 /**
    120 * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
    121 * that can determine breaks for characters in a specific set, if
    122 * such an object can be found.</p>
    123 *
    124 * <p>If a LanguageBreakFactory is to be shared between threads,
    125 * appropriate synchronization must be used; there is none internal
    126 * to the factory.</p>
    127 *
    128 * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
    129 * normally be shared between threads without synchronization, unless
    130 * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
    131 *
    132 * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
    133 * it returns when it itself is deleted, unless the specific subclass of
    134 * LanguageBreakFactory indicates otherwise. Naturally, the factory should
    135 * not be deleted until the LanguageBreakEngines it has returned are no
    136 * longer needed.</p>
    137 */
    138 class LanguageBreakFactory : public UMemory {
    139 public:
    140 
    141  /**
    142   * <p>Default constructor.</p>
    143   *
    144   */
    145  LanguageBreakFactory();
    146 
    147  /**
    148   * <p>Virtual destructor.</p>
    149   */
    150  virtual ~LanguageBreakFactory();
    151 
    152 /**
    153  * <p>Find and return a LanguageBreakEngine that can find the desired
    154  * kind of break for the set of characters to which the supplied
    155  * character belongs. It is up to the set of available engines to
    156  * determine what the sets of characters are.</p>
    157  *
    158  * @param c A character that begins a run for which a LanguageBreakEngine is
    159  * sought.
    160  * @param locale The locale.
    161  * @return A LanguageBreakEngine with the desired characteristics, or 0.
    162  */
    163  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;
    164 
    165 };
    166 
    167 /*******************************************************************
    168 * UnhandledEngine
    169 */
    170 
    171 /**
    172 * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
    173 * handles characters that no other LanguageBreakEngine is available to
    174 * handle. It is told the character and the type of break; at its
    175 * discretion it may handle more than the specified character (e.g.,
    176 * the entire script to which that character belongs.</p>
    177 *
    178 * <p>UnhandledEngines may not be shared between threads without
    179 * external synchronization.</p>
    180 */
    181 
    182 class UnhandledEngine : public LanguageBreakEngine {
    183 private:
    184 
    185    /**
    186     * The sets of characters handled.
    187     * @internal
    188     */
    189 
    190  UnicodeSet    *fHandled;
    191 
    192 public:
    193 
    194  /**
    195   * <p>Default constructor.</p>
    196   *
    197   */
    198  UnhandledEngine(UErrorCode &status);
    199 
    200  /**
    201   * <p>Virtual destructor.</p>
    202   */
    203  virtual ~UnhandledEngine();
    204 
    205 /**
    206  * <p>Indicate whether this engine handles a particular character for
    207  * a particular kind of break.</p>
    208  *
    209  * @param c A character which begins a run that the engine might handle
    210  * @param locale The locale.
    211  * @return true if this engine handles the particular character and break
    212  * type.
    213  */
    214  virtual UBool handles(UChar32 c, const char* locale) const override;
    215 
    216 /**
    217  * <p>Find any breaks within a run in the supplied text.</p>
    218  *
    219  * @param text A UText representing the text (TODO: UText). The
    220  * iterator is left at the end of the run of characters which the engine
    221  * is capable of handling.
    222  * @param startPos The start of the run within the supplied text.
    223  * @param endPos The end of the run within the supplied text.
    224  * @param foundBreaks An allocated C array of the breaks found, if any
    225  * @param status Information on any errors encountered.
    226  * @return The number of breaks found.
    227  */
    228  virtual int32_t findBreaks( UText *text,
    229                              int32_t startPos,
    230                              int32_t endPos,
    231                              UVector32 &foundBreaks,
    232                              UBool isPhraseBreaking,
    233                              UErrorCode &status) const override;
    234 
    235 /**
    236  * <p>Tell the engine to handle a particular character and break type.</p>
    237  *
    238  * @param c A character which the engine should handle
    239  */
    240  virtual void handleCharacter(UChar32 c);
    241 
    242 };
    243 
    244 /*******************************************************************
    245 * ICULanguageBreakFactory
    246 */
    247 
    248 /**
    249 * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
    250 * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
    251 * data in the ICU data file.</p>
    252 */
    253 class ICULanguageBreakFactory : public LanguageBreakFactory {
    254 private:
    255 
    256    /**
    257     * The stack of break engines created by this factory
    258     * @internal
    259     */
    260 
    261  UStack    *fEngines;
    262 
    263 public:
    264 
    265  /**
    266   * <p>Standard constructor.</p>
    267   *
    268   */
    269  ICULanguageBreakFactory(UErrorCode &status);
    270 
    271  /**
    272   * <p>Virtual destructor.</p>
    273   */
    274  virtual ~ICULanguageBreakFactory();
    275 
    276 /**
    277  * <p>Find and return a LanguageBreakEngine that can find the desired
    278  * kind of break for the set of characters to which the supplied
    279  * character belongs. It is up to the set of available engines to
    280  * determine what the sets of characters are.</p>
    281  *
    282  * @param c A character that begins a run for which a LanguageBreakEngine is
    283  * sought.
    284  * @param locale The locale.
    285  * @return A LanguageBreakEngine with the desired characteristics, or 0.
    286  */
    287  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;
    288 
    289  /**
    290   * Add and adopt the engine and return an URegistryKey.
    291   * @param engine The ExternalBreakEngine to be added and adopt. The caller
    292   *     pass the ownership and should not release the memory after this.
    293   * @param status the error code.
    294   */
    295  virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);
    296 
    297 protected:
    298 /**
    299  * <p>Create a LanguageBreakEngine for the set of characters to which
    300  * the supplied character belongs, for the specified break type.</p>
    301  *
    302  * @param c A character that begins a run for which a LanguageBreakEngine is
    303  * sought.
    304  * @param locale The locale.
    305  * @return A LanguageBreakEngine with the desired characteristics, or 0.
    306  */
    307  virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);
    308 
    309  /**
    310   * <p>Create a DictionaryMatcher for the specified script and break type.</p>
    311   * @param script An ISO 15924 script code that identifies the dictionary to be
    312   * created.
    313   * @return A DictionaryMatcher with the desired characteristics, or nullptr.
    314   */
    315  virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
    316 
    317 private:
    318  void ensureEngines(UErrorCode& status);
    319 };
    320 
    321 U_NAMESPACE_END
    322 
    323    /* BRKENG_H */
    324 #endif