tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ppucd.h (5747B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2011-2013, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  ppucd.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2011dec11
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __PPUCD_H__
     18 #define __PPUCD_H__
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/uniset.h"
     22 #include "unicode/unistr.h"
     23 
     24 #include <stdio.h>
     25 
     26 /** Additions to the uchar.h enum UProperty. */
     27 enum {
     28    /** Name_Alias */
     29    PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
     30    PPUCD_CONDITIONAL_CASE_MAPPINGS,
     31    PPUCD_TURKIC_CASE_FOLDING
     32 };
     33 
     34 U_NAMESPACE_BEGIN
     35 
     36 class U_TOOLUTIL_API PropertyNames {
     37 public:
     38    virtual ~PropertyNames();
     39    virtual int32_t getPropertyEnum(const char *name) const = 0;
     40    virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const = 0;
     41 };
     42 
     43 struct U_TOOLUTIL_API UniProps {
     44    UniProps();
     45    ~UniProps();
     46 
     47    int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }
     48 
     49    UChar32 start, end;
     50    UBool binProps[UCHAR_BINARY_LIMIT];
     51    int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
     52    UVersionInfo age;
     53    UChar32 bmg, bpb;
     54    UChar32 scf, slc, stc, suc;
     55    int32_t digitValue;
     56    const char *numericValue;
     57    const char *name;
     58    const char *nameAlias;
     59    UnicodeString cf, lc, tc, uc;
     60    UnicodeSet scx, idType;
     61 };
     62 
     63 class U_TOOLUTIL_API PreparsedUCD {
     64 public:
     65    enum LineType {
     66        /** No line, end of file. */
     67        NO_LINE,
     68        /** Empty line. (Might contain a comment.) */
     69        EMPTY_LINE,
     70 
     71        /** ucd;6.1.0 */
     72        UNICODE_VERSION_LINE,
     73 
     74        /** property;Binary;Alpha;Alphabetic */
     75        PROPERTY_LINE,
     76        /** binary;N;No;F;False */
     77        BINARY_LINE,
     78        /** value;gc;Zs;Space_Separator */
     79        VALUE_LINE,
     80 
     81        /** defaults;0000..10FFFF;age=NA;bc=L;... */
     82        DEFAULTS_LINE,
     83        /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
     84        BLOCK_LINE,
     85        /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
     86        CP_LINE,
     87        /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */
     88        UNASSIGNED_LINE,
     89 
     90        /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
     91        ALG_NAMES_RANGE_LINE,
     92 
     93        LINE_TYPE_COUNT
     94    };
     95 
     96    /**
     97     * Constructor.
     98     * Prepare this object for a new, empty package.
     99     */
    100    PreparsedUCD(const char *filename, UErrorCode &errorCode);
    101 
    102    /** Destructor. */
    103    ~PreparsedUCD();
    104 
    105    /** Sets (aliases) a PropertyNames implementation. Caller retains ownership. */
    106    void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
    107 
    108    /**
    109     * Reads a line from the preparsed UCD file.
    110     * Splits the line by replacing each ';' with a NUL.
    111     */
    112    LineType readLine(UErrorCode &errorCode);
    113 
    114    /** Returns the number of the line read by readLine(). */
    115    int32_t getLineNumber() const { return lineNumber; }
    116 
    117    /** Returns the line's next field, or nullptr. */
    118    const char *nextField();
    119 
    120    /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
    121    const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
    122 
    123    /** Returns true if the current line has property values. */
    124    UBool lineHasPropertyValues() const {
    125        return DEFAULTS_LINE<=lineType && lineType<=UNASSIGNED_LINE;
    126    }
    127 
    128    /**
    129     * Parses properties from the current line.
    130     * Clears newValues and sets UProperty codes for property values mentioned
    131     * on the current line (as opposed to being inherited).
    132     * Returns a pointer to the filled-in UniProps, or nullptr if something went wrong.
    133     * The returned UniProps are usable until the next line of the same type is read.
    134     */
    135    const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
    136 
    137    /**
    138     * Returns the code point range for the current algnamesrange line.
    139     * Calls & parses nextField().
    140     * Further nextField() calls will yield the range's type & prefix string.
    141     * Returns U_SUCCESS(errorCode).
    142     */
    143    UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
    144 
    145 private:
    146    UBool isLineBufferAvailable(int32_t i) {
    147        return defaultLineIndex!=i && blockLineIndex!=i;
    148    }
    149 
    150    /** Resets the field iterator and returns the line's first field (the line type field). */
    151    const char *firstField();
    152 
    153    UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
    154                        UErrorCode &errorCode);
    155    UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
    156    UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
    157    void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
    158    void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);
    159    void parseIdentifierType(const char *s, UnicodeSet &idType, UErrorCode &errorCode);
    160 
    161    static const int32_t kNumLineBuffers=3;
    162 
    163    const PropertyNames *pnames;  // aliased
    164    FILE *file;
    165    int32_t defaultLineIndex, blockLineIndex, lineIndex;
    166    int32_t lineNumber;
    167    LineType lineType;
    168    char *fieldLimit;
    169    char *lineLimit;
    170 
    171    UVersionInfo ucdVersion;
    172    UniProps defaultProps, blockProps, cpProps;
    173    UnicodeSet blockValues;
    174    // Multiple lines so that default and block properties can maintain pointers
    175    // into their line buffers.
    176    char lines[kNumLineBuffers][4096];
    177 };
    178 
    179 U_NAMESPACE_END
    180 
    181 #endif  // __PPUCD_H__