tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uspoof_conf.h (4837B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 2008-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  uspoof_conf.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2009Jan05
     16 *   created by: Andy Heninger
     17 *
     18 *   Internal classes for compiling confusable data into its binary (runtime) form.
     19 */
     20 
     21 #ifndef __USPOOF_BUILDCONF_H__
     22 #define __USPOOF_BUILDCONF_H__
     23 
     24 #include "unicode/utypes.h"
     25 
     26 #if !UCONFIG_NO_NORMALIZATION
     27 
     28 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 
     29 
     30 #include "unicode/uregex.h"
     31 #include "uhash.h"
     32 #include "uspoof_impl.h"
     33 
     34 U_NAMESPACE_BEGIN
     35 
     36 // SPUString
     37 //              Holds a string that is the result of one of the mappings defined
     38 //              by the confusable mapping data (confusables.txt from Unicode.org)
     39 //              Instances of SPUString exist during the compilation process only.
     40 
     41 struct SPUString : public UMemory {
     42    LocalPointer<UnicodeString> fStr;     // The actual string.
     43    int32_t      fCharOrStrTableIndex;    // Index into the final runtime data for this
     44                                          // string (or, for length 1, the single string char
     45                                          // itself, there being no string table entry for it.)
     46 
     47    SPUString(LocalPointer<UnicodeString> s);
     48    ~SPUString();
     49 };
     50 
     51 
     52 //  String Pool   A utility class for holding the strings that are the result of
     53 //                the spoof mappings.  These strings will utimately end up in the
     54 //                run-time String Table.
     55 //                This is sort of like a sorted set of strings, except that ICU's anemic
     56 //                built-in collections don't support those, so it is implemented with a
     57 //                combination of a uhash and a UVector.
     58 
     59 
     60 class SPUStringPool : public UMemory {
     61  public:
     62    SPUStringPool(UErrorCode &status);
     63    ~SPUStringPool();
     64    
     65    // Add a string. Return the string from the table.
     66    // If the input parameter string is already in the table, delete the
     67    //  input parameter and return the existing string.
     68    SPUString *addString(UnicodeString *src, UErrorCode &status);
     69 
     70 
     71    // Get the n-th string in the collection.
     72    SPUString *getByIndex(int32_t i);
     73 
     74    // Sort the contents; affects the ordering of getByIndex().
     75    void sort(UErrorCode &status);
     76 
     77    int32_t size();
     78 
     79  private:
     80    UVector     *fVec;    // Elements are SPUString *
     81    UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
     82 };
     83 
     84 
     85 // class ConfusabledataBuilder
     86 //     An instance of this class exists while the confusable data is being built from source.
     87 //     It encapsulates the intermediate data structures that are used for building.
     88 //     It exports one static function, to do a confusable data build.
     89 
     90 class ConfusabledataBuilder : public UMemory {
     91  private:
     92    SpoofImpl  *fSpoofImpl;
     93    char16_t   *fInput;
     94    UHashtable *fTable;
     95    UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
     96 
     97    // The binary data is first assembled into the following four collections, then
     98    //   copied to its final raw-memory destination.
     99    UVector            *fKeyVec;
    100    UVector            *fValueVec;
    101    UnicodeString      *fStringTable;
    102    
    103    SPUStringPool      *stringPool;
    104    URegularExpression *fParseLine;
    105    URegularExpression *fParseHexNum;
    106    int32_t             fLineNum;
    107 
    108    ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
    109    ~ConfusabledataBuilder();
    110    void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
    111 
    112    // Add an entry to the key and value tables being built
    113    //   input:  data from SLTable, MATable, etc.
    114    //   output:  entry added to fKeyVec and fValueVec
    115    void addKeyEntry(UChar32     keyChar,     // The key character
    116                     UHashtable *table,       // The table, one of SATable, MATable, etc.
    117                     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
    118                     UErrorCode &status);
    119 
    120    // From an index into fKeyVec & fValueVec
    121    //   get a UnicodeString with the corresponding mapping.
    122    UnicodeString getMapping(int32_t index);
    123 
    124    // Populate the final binary output data array with the compiled data.
    125    void outputData(UErrorCode &status);
    126 
    127  public:
    128    static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
    129        int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
    130 };
    131 U_NAMESPACE_END
    132 
    133 #endif
    134 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS 
    135 #endif  // __USPOOF_BUILDCONF_H__