tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rbbistbl.cpp (8945B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
      5 //
      6 /*
      7 ***************************************************************************
      8 *   Copyright (C) 2002-2014 International Business Machines Corporation
      9 *   and others. All rights reserved.
     10 ***************************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_BREAK_ITERATION
     16 
     17 #include "unicode/unistr.h"
     18 #include "unicode/uniset.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/parsepos.h"
     21 
     22 #include "cstr.h"
     23 #include "rbbinode.h"
     24 #include "rbbirb.h"
     25 #include "umutex.h"
     26 
     27 
     28 //
     29 //  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
     30 //                                  when the hash table is deleted.
     31 //
     32 U_CDECL_BEGIN
     33 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
     34    icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
     35    delete px;
     36 }
     37 U_CDECL_END
     38 
     39 
     40 
     41 U_NAMESPACE_BEGIN
     42 
     43 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
     44    : fRules(rules), fRuleScanner(rs), ffffString(static_cast<char16_t>(0xffff))
     45 {
     46    fHashTable       = nullptr;
     47    fCachedSetLookup = nullptr;
     48    
     49    fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, &status);
     50    // uhash_open checks status
     51    if (U_FAILURE(status)) {
     52        return;
     53    }
     54    uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
     55 }
     56 
     57 
     58 
     59 RBBISymbolTable::~RBBISymbolTable()
     60 {
     61    uhash_close(fHashTable);
     62 }
     63 
     64 
     65 //
     66 //  RBBISymbolTable::lookup       This function from the abstract symbol table interface
     67 //                                looks up a variable name and returns a UnicodeString
     68 //                                containing the substitution text.
     69 //
     70 //                                The variable name does NOT include the leading $.
     71 //
     72 const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
     73 {
     74    RBBISymbolTableEntry  *el;
     75    RBBINode              *varRefNode;
     76    RBBINode              *exprNode;
     77    RBBINode              *usetNode;
     78    const UnicodeString   *retString;
     79    RBBISymbolTable       *This = const_cast<RBBISymbolTable*>(this); // cast off const
     80 
     81    el = static_cast<RBBISymbolTableEntry*>(uhash_get(fHashTable, &s));
     82    if (el == nullptr) {
     83        return nullptr;
     84    }
     85 
     86    varRefNode = el->val;
     87    exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
     88    if (exprNode->fType == RBBINode::setRef) {
     89        // The $variable refers to a single UnicodeSet
     90        //   return the ffffString, which will subsequently be interpreted as a
     91        //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
     92        usetNode = exprNode->fLeftChild;
     93        This->fCachedSetLookup = usetNode->fInputSet;
     94        retString = &ffffString;
     95    }
     96    else
     97    {
     98        // The variable refers to something other than just a set.
     99        // return the original source string for the expression
    100        retString = &exprNode->fText;
    101        This->fCachedSetLookup = nullptr;
    102    }
    103    return retString;
    104 }
    105 
    106 
    107 
    108 //
    109 //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
    110 //                                   interface maps a single stand-in character to a
    111 //                                   pointer to a Unicode Set.   The Unicode Set code uses this
    112 //                                   mechanism to get all references to the same $variable
    113 //                                   name to refer to a single common Unicode Set instance.
    114 //
    115 //    This implementation cheats a little, and does not maintain a map of stand-in chars
    116 //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
    117 //    constructor will always call this function right after calling lookup(),
    118 //    and we just need to remember what set to return between these two calls.
    119 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
    120 {
    121    UnicodeSet *retVal = nullptr;
    122    RBBISymbolTable *This = const_cast<RBBISymbolTable*>(this); // cast off const
    123    if (ch == 0xffff) {
    124        retVal = fCachedSetLookup;
    125        This->fCachedSetLookup = nullptr;
    126    }
    127    return retVal;
    128 }
    129 
    130 //
    131 // RBBISymbolTable::parseReference   This function from the abstract symbol table interface
    132 //                                   looks for a $variable name in the source text.
    133 //                                   It does not look it up, only scans for it.
    134 //                                   It is used by the UnicodeSet parser.
    135 //
    136 //                                   This implementation is lifted pretty much verbatim
    137 //                                   from the rules based transliterator implementation.
    138 //                                   I didn't see an obvious way of sharing it.
    139 //
    140 UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
    141                                                ParsePosition& pos, int32_t limit) const
    142 {
    143    int32_t start = pos.getIndex();
    144    int32_t i = start;
    145    UnicodeString result;
    146    while (i < limit) {
    147        char16_t c = text.charAt(i);
    148        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
    149            break;
    150        }
    151        ++i;
    152    }
    153    if (i == start) { // No valid name chars
    154        return result; // Indicate failure with empty string
    155    }
    156    pos.setIndex(i);
    157    text.extractBetween(start, i, result);
    158    return result;
    159 }
    160 
    161 
    162 
    163 //
    164 // RBBISymbolTable::lookupNode      Given a key (a variable name), return the
    165 //                                  corresponding RBBI Node.  If there is no entry
    166 //                                  in the table for this name, return nullptr.
    167 //
    168 RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
    169 
    170    RBBINode             *retNode = nullptr;
    171    RBBISymbolTableEntry *el;
    172 
    173    el = static_cast<RBBISymbolTableEntry*>(uhash_get(fHashTable, &key));
    174    if (el != nullptr) {
    175        retNode = el->val;
    176    }
    177    return retNode;
    178 }
    179 
    180 
    181 //
    182 //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
    183 //                                  Indicate an error if the name already exists -
    184 //                                    this will only occur in the case of duplicate
    185 //                                    variable assignments.
    186 //
    187 void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
    188    RBBISymbolTableEntry *e;
    189    /* test for buffer overflows */
    190    if (U_FAILURE(err)) {
    191        return;
    192    }
    193    e = static_cast<RBBISymbolTableEntry*>(uhash_get(fHashTable, &key));
    194    if (e != nullptr) {
    195        err = U_BRK_VARIABLE_REDFINITION;
    196        return;
    197    }
    198 
    199    e = new RBBISymbolTableEntry;
    200    if (e == nullptr) {
    201        err = U_MEMORY_ALLOCATION_ERROR;
    202        return;
    203    }
    204    e->key = key;
    205    e->val = val;
    206    uhash_put( fHashTable, &e->key, e, &err);
    207 }
    208 
    209 
    210 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(nullptr) {}
    211 
    212 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
    213    // The "val" of a symbol table entry is a variable reference node.
    214    // The l. child of the val is the rhs expression from the assignment.
    215    // Unlike other node types, children of variable reference nodes are not
    216    //    automatically recursively deleted.  We do it manually here.
    217    delete val->fLeftChild;
    218    val->fLeftChild = nullptr;
    219 
    220    delete  val;
    221 
    222    // Note: the key UnicodeString is destructed by virtue of being in the object by value.
    223 }
    224 
    225 
    226 //
    227 //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
    228 //
    229 #ifdef RBBI_DEBUG
    230 void RBBISymbolTable::rbbiSymtablePrint() const {
    231    RBBIDebugPrintf("Variable Definitions Symbol Table\n"
    232           "Name                  Node         serial  String Val\n"
    233           "-------------------------------------------------------------------\n");
    234 
    235    int32_t pos = UHASH_FIRST;
    236    const UHashElement  *e   = nullptr;
    237    for (;;) {
    238        e = uhash_nextElement(fHashTable,  &pos);
    239        if (e == nullptr ) {
    240            break;
    241        }
    242        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
    243 
    244        RBBIDebugPrintf("%-19s   %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
    245        RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
    246    }
    247 
    248    RBBIDebugPrintf("\nParsed Variable Definitions\n");
    249    pos = -1;
    250    for (;;) {
    251        e = uhash_nextElement(fHashTable,  &pos);
    252        if (e == nullptr ) {
    253            break;
    254        }
    255        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
    256        RBBIDebugPrintf("%s\n", CStr(s->key)());
    257        RBBINode::printTree(s->val, true);
    258        RBBINode::printTree(s->val->fLeftChild, false);
    259        RBBIDebugPrintf("\n");
    260    }
    261 }
    262 #endif
    263 
    264 
    265 
    266 
    267 
    268 U_NAMESPACE_END
    269 
    270 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */