tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbbitblb.h (8722B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  rbbitblb.h
      5 //
      6 
      7 /*
      8 **********************************************************************
      9 *   Copyright (c) 2002-2016, International Business Machines
     10 *   Corporation and others.  All Rights Reserved.
     11 **********************************************************************
     12 */
     13 
     14 #ifndef RBBITBLB_H
     15 #define RBBITBLB_H
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_BREAK_ITERATION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/rbbi.h"
     23 #include "rbbidata.h"
     24 #include "rbbirb.h"
     25 #include "rbbinode.h"
     26 
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 class RBBIRuleScanner;
     31 class RBBIRuleBuilder;
     32 class UVector32;
     33 
     34 //
     35 //  class RBBITableBuilder is part of the RBBI rule compiler.
     36 //                         It builds the state transition table used by the RBBI runtime
     37 //                         from the expression syntax tree generated by the rule scanner.
     38 //
     39 //                         This class is part of the RBBI implementation only.
     40 //                         There is no user-visible public API here.
     41 //
     42 
     43 class RBBITableBuilder : public UMemory {
     44 public:
     45    RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
     46    ~RBBITableBuilder();
     47 
     48    void     buildForwardTable();
     49 
     50    /** Return the runtime size in bytes of the built state table.  */
     51    int32_t  getTableSize() const;
     52 
     53    /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
     54     */
     55    void     exportTable(void *where);
     56 
     57    /** Use 8 bits to encode the forward table */
     58    bool     use8BitsForTable() const;
     59 
     60    /**
     61     *  Find duplicate (redundant) character classes. Begin looking with categories.first.
     62     *  Duplicate, if found are returned in the categories parameter.
     63     *  This is an iterator-like function, used to identify character classes
     64     *  (state table columns) that can be eliminated.
     65     *  @param categories in/out parameter, specifies where to start looking for duplicates,
     66     *                and returns the first pair of duplicates found, if any.
     67     *  @return true if duplicate char classes were found, false otherwise.
     68     */
     69    bool     findDuplCharClassFrom(IntPair *categories);
     70 
     71    /** Remove a column from the state table. Used when two character categories
     72     *  have been found equivalent, and merged together, to eliminate the unneeded table column.
     73     */
     74    void     removeColumn(int32_t column);
     75 
     76    /**
     77     * Check for, and remove duplicate states (table rows).
     78     * @return the number of states removed.
     79     */
     80    int32_t  removeDuplicateStates();
     81 
     82    /** Build the safe reverse table from the already-constructed forward table. */
     83    void     buildSafeReverseTable(UErrorCode &status);
     84 
     85    /** Return the runtime size in bytes of the built safe reverse state table. */
     86    int32_t  getSafeTableSize() const;
     87 
     88    /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
     89     */
     90    void     exportSafeTable(void *where);
     91 
     92    /** Use 8 bits to encode the safe reverse table */
     93    bool     use8BitsForSafeTable() const;
     94 
     95 private:
     96    void     calcNullable(RBBINode *n);
     97    void     calcFirstPos(RBBINode *n);
     98    void     calcLastPos(RBBINode  *n);
     99    void     calcFollowPos(RBBINode *n);
    100    void     calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode);
    101    void     bofFixup();
    102    void     buildStateTable();
    103    void     mapLookAheadRules();
    104    void     flagAcceptingStates();
    105    void     flagLookAheadStates();
    106    void     flagTaggedStates();
    107    void     mergeRuleStatusVals();
    108 
    109    /**
    110     * Merge redundant state table columns, eliminating character classes with identical behavior.
    111     * Done after the state tables are generated, just before converting to their run-time format.
    112     */
    113    int32_t  mergeColumns();
    114 
    115    void     addRuleRootNodes(UVector *dest, RBBINode *node);
    116 
    117    /**
    118     *  Find duplicate (redundant) states, beginning at the specified pair,
    119     *  within this state table. This is an iterator-like function, used to
    120     *  identify states (state table rows) that can be eliminated.
    121     *  @param states in/out parameter, specifies where to start looking for duplicates,
    122     *                and returns the first pair of duplicates found, if any.
    123     *  @return true if duplicate states were found, false otherwise.
    124     */
    125    bool findDuplicateState(IntPair *states);
    126 
    127    /** Remove a duplicate state.
    128     * @param duplStates The duplicate states. The first is kept, the second is removed.
    129     *                   All references to the second in the state table are retargeted
    130     *                   to the first.
    131     */
    132    void removeState(IntPair duplStates);
    133 
    134    /** Find the next duplicate state in the safe reverse table. An iterator function.
    135     *  @param states in/out parameter, specifies where to start looking for duplicates,
    136     *                and returns the first pair of duplicates found, if any.
    137     *  @return true if a duplicate pair of states was found.
    138     */
    139    bool findDuplicateSafeState(IntPair *states);
    140 
    141    /** Remove a duplicate state from the safe table.
    142     * @param duplStates The duplicate states. The first is kept, the second is removed.
    143     *                   All references to the second in the state table are retargeted
    144     *                   to the first.
    145     */
    146    void removeSafeState(IntPair duplStates);
    147 
    148    // Set functions for UVector.
    149    //   TODO:  make a USet subclass of UVector
    150 
    151    void     setAdd(UVector *dest, UVector *source);
    152    UBool    setEquals(UVector *a, UVector *b);
    153 
    154    void     sortedAdd(UVector **dest, int32_t val);
    155 
    156 public:
    157 #ifdef RBBI_DEBUG
    158    void     printSet(UVector *s);
    159    void     printPosSets(RBBINode *n /* = nullptr */);
    160    void     printStates();
    161    void     printRuleStatusTable();
    162    void     printReverseTable();
    163 #else
    164    #define  printSet(s)
    165    #define  printPosSets(n)
    166    #define  printStates()
    167    #define  printRuleStatusTable()
    168    #define  printReverseTable()
    169 #endif
    170 
    171 private:
    172    RBBIRuleBuilder  *fRB;
    173    RBBINode         *&fTree;              // The root node of the parse tree to build a
    174                                           //   table for.
    175    UErrorCode       *fStatus;
    176 
    177    /** State Descriptors, UVector<RBBIStateDescriptor> */
    178    UVector          *fDStates;            //  D states (Aho's terminology)
    179                                           //  Index is state number
    180                                           //  Contents are RBBIStateDescriptor pointers.
    181 
    182    /** Synthesized safe table, UVector of UnicodeString, one string per table row.   */
    183    UVector          *fSafeTable;
    184 
    185    /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
    186    UVector32        *fLookAheadRuleMap = nullptr;
    187 
    188    /* Counter used when assigning lookahead rule numbers.
    189     * Contains the last look-ahead number already in use.
    190     * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
    191     * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT.   */
    192    int32_t          fLASlotsInUse = ACCEPTING_UNCONDITIONAL;
    193 
    194 
    195    RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class
    196    RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class
    197 };
    198 
    199 //
    200 //  RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
    201 //                        one for each state.
    202 class RBBIStateDescriptor : public UMemory {
    203 public:
    204    UBool            fMarked;
    205    uint32_t         fAccepting;
    206    uint32_t         fLookAhead;
    207    UVector          *fTagVals;
    208    int32_t          fTagsIdx;
    209    UVector          *fPositions;          // Set of parse tree positions associated
    210                                           //   with this state.  Unordered (it's a set).
    211                                           //   UVector contents are RBBINode *
    212 
    213    UVector32        *fDtran;              // Transitions out of this state.
    214                                           //   indexed by input character
    215                                           //   contents is int index of dest state
    216                                           //   in RBBITableBuilder.fDStates
    217 
    218    RBBIStateDescriptor(int maxInputSymbol,  UErrorCode *fStatus);
    219    ~RBBIStateDescriptor();
    220 
    221 private:
    222    RBBIStateDescriptor(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
    223    RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
    224 };
    225 
    226 
    227 
    228 U_NAMESPACE_END
    229 
    230 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    231 
    232 #endif