tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ppucd.cpp (22135B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2011-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  ppucd.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2011dec11
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/uchar.h"
     19 #include "charstr.h"
     20 #include "cstring.h"
     21 #include "ppucd.h"
     22 #include "uassert.h"
     23 #include "uparse.h"
     24 
     25 #include <stdio.h>
     26 #include <string.h>
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 PropertyNames::~PropertyNames() {}
     31 
     32 UniProps::UniProps()
     33        : start(U_SENTINEL), end(U_SENTINEL),
     34          bmg(U_SENTINEL), bpb(U_SENTINEL),
     35          scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
     36          digitValue(-1), numericValue(nullptr),
     37          name(nullptr), nameAlias(nullptr) {
     38    memset(binProps, 0, sizeof(binProps));
     39    memset(intProps, 0, sizeof(intProps));
     40    memset(age, 0, 4);
     41 }
     42 
     43 UniProps::~UniProps() {}
     44 
     45 const int32_t PreparsedUCD::kNumLineBuffers;
     46 
     47 PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
     48        : pnames(nullptr),
     49          file(nullptr),
     50          defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
     51          lineNumber(0),
     52          lineType(NO_LINE),
     53          fieldLimit(nullptr), lineLimit(nullptr) {
     54    if(U_FAILURE(errorCode)) { return; }
     55 
     56    if(filename==nullptr || *filename==0 || (*filename=='-' && filename[1]==0)) {
     57        filename=nullptr;
     58        file=stdin;
     59    } else {
     60        file=fopen(filename, "r");
     61    }
     62    if(file==nullptr) {
     63        perror("error opening preparsed UCD");
     64        fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
     65        errorCode=U_FILE_ACCESS_ERROR;
     66        return;
     67    }
     68 
     69    memset(ucdVersion, 0, 4);
     70    lines[0][0]=0;
     71 }
     72 
     73 PreparsedUCD::~PreparsedUCD() {
     74    if(file!=stdin) {
     75        fclose(file);
     76    }
     77 }
     78 
     79 // Same order as the LineType values.
     80 static const char *lineTypeStrings[]={
     81    nullptr,
     82    nullptr,
     83    "ucd",
     84    "property",
     85    "binary",
     86    "value",
     87    "defaults",
     88    "block",
     89    "cp",
     90    "unassigned",
     91    "algnamesrange"
     92 };
     93 
     94 PreparsedUCD::LineType
     95 PreparsedUCD::readLine(UErrorCode &errorCode) {
     96    if(U_FAILURE(errorCode)) { return NO_LINE; }
     97    // Select the next available line buffer.
     98    while(!isLineBufferAvailable(lineIndex)) {
     99        ++lineIndex;
    100        if (lineIndex == kNumLineBuffers) {
    101            lineIndex = 0;
    102        }
    103    }
    104    char *line=lines[lineIndex];
    105    *line=0;
    106    lineLimit=fieldLimit=line;
    107    lineType=NO_LINE;
    108    char *result=fgets(line, sizeof(lines[0]), file);
    109    if(result==nullptr) {
    110        if(ferror(file)) {
    111            perror("error reading preparsed UCD");
    112            fprintf(stderr, "error reading preparsed UCD before line %ld\n", static_cast<long>(lineNumber));
    113            errorCode=U_FILE_ACCESS_ERROR;
    114        }
    115        return NO_LINE;
    116    }
    117    ++lineNumber;
    118    if(*line=='#') {
    119        fieldLimit=strchr(line, 0);
    120        return lineType=EMPTY_LINE;
    121    }
    122    // Remove trailing /r/n.
    123    char c;
    124    char *limit=strchr(line, 0);
    125    while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
    126    // Remove trailing white space.
    127    while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
    128    *limit=0;
    129    lineLimit=limit;
    130    if(line==limit) {
    131        fieldLimit=limit;
    132        return lineType=EMPTY_LINE;
    133    }
    134    // Split by ';'.
    135    char *semi=line;
    136    while((semi=strchr(semi, ';'))!=nullptr) { *semi++=0; }
    137    fieldLimit=strchr(line, 0);
    138    // Determine the line type.
    139    int32_t type;
    140    for(type=EMPTY_LINE+1;; ++type) {
    141        if(type==LINE_TYPE_COUNT) {
    142            fprintf(stderr,
    143                    "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
    144                    line, static_cast<long>(lineNumber));
    145            errorCode=U_PARSE_ERROR;
    146            return NO_LINE;
    147        }
    148        if(0==strcmp(line, lineTypeStrings[type])) {
    149            break;
    150        }
    151    }
    152    lineType = static_cast<LineType>(type);
    153    if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
    154        u_versionFromString(ucdVersion, fieldLimit+1);
    155    }
    156    return lineType;
    157 }
    158 
    159 const char *
    160 PreparsedUCD::firstField() {
    161    char *field=lines[lineIndex];
    162    fieldLimit=strchr(field, 0);
    163    return field;
    164 }
    165 
    166 const char *
    167 PreparsedUCD::nextField() {
    168    if(fieldLimit==lineLimit) { return nullptr; }
    169    char *field=fieldLimit+1;
    170    fieldLimit=strchr(field, 0);
    171    return field;
    172 }
    173 
    174 const UniProps *
    175 PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
    176    if(U_FAILURE(errorCode)) { return nullptr; }
    177    newValues.clear();
    178    if(!lineHasPropertyValues()) {
    179        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    180        return nullptr;
    181    }
    182    firstField();
    183    const char *field=nextField();
    184    if(field==nullptr) {
    185        // No range field after the type.
    186        fprintf(stderr,
    187                "error in preparsed UCD: missing default/block/cp range field "
    188                "(no second field) on line %ld\n",
    189                static_cast<long>(lineNumber));
    190        errorCode=U_PARSE_ERROR;
    191        return nullptr;
    192    }
    193    UChar32 start, end;
    194    if(!parseCodePointRange(field, start, end, errorCode)) { return nullptr; }
    195    UniProps *props;
    196    UBool insideBlock=false;  // true if cp or unassigned range inside the block range.
    197    switch(lineType) {
    198    case DEFAULTS_LINE:
    199        // Should occur before any block/cp/unassigned line.
    200        if(blockLineIndex>=0) {
    201            fprintf(stderr,
    202                    "error in preparsed UCD: default line %ld after one or more block lines\n",
    203                    static_cast<long>(lineNumber));
    204            errorCode=U_PARSE_ERROR;
    205            return nullptr;
    206        }
    207        if(defaultLineIndex>=0) {
    208            fprintf(stderr,
    209                    "error in preparsed UCD: second line with default properties on line %ld\n",
    210                    static_cast<long>(lineNumber));
    211            errorCode=U_PARSE_ERROR;
    212            return nullptr;
    213        }
    214        if(start!=0 || end!=0x10ffff) {
    215            fprintf(stderr,
    216                    "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
    217                    field, static_cast<long>(lineNumber));
    218            errorCode=U_PARSE_ERROR;
    219            return nullptr;
    220        }
    221        props=&defaultProps;
    222        defaultLineIndex=lineIndex;
    223        break;
    224    case BLOCK_LINE:
    225        blockProps=defaultProps;  // Block inherits default properties.
    226        props=&blockProps;
    227        blockLineIndex=lineIndex;
    228        break;
    229    case CP_LINE:
    230    case UNASSIGNED_LINE:
    231        if(blockProps.start<=start && end<=blockProps.end) {
    232            insideBlock=true;
    233            if(lineType==CP_LINE) {
    234                // Code point range fully inside the last block inherits the block properties.
    235                cpProps=blockProps;
    236            } else {
    237                // Unassigned line inside the block is based on default properties
    238                // which override block properties.
    239                cpProps=defaultProps;
    240                newValues=blockValues;
    241                // Except, it inherits the one blk=Block property.
    242                int32_t blkIndex=UCHAR_BLOCK-UCHAR_INT_START;
    243                cpProps.intProps[blkIndex]=blockProps.intProps[blkIndex];
    244                newValues.remove(static_cast<UChar32>(UCHAR_BLOCK));
    245            }
    246        } else if(start>blockProps.end || end<blockProps.start) {
    247            // Code point range fully outside the last block inherits the default properties.
    248            cpProps=defaultProps;
    249        } else {
    250            // Code point range partially overlapping with the last block is illegal.
    251            fprintf(stderr,
    252                    "error in preparsed UCD: cp range %s on line %ld only "
    253                    "partially overlaps with block range %04lX..%04lX\n",
    254                    field, static_cast<long>(lineNumber), static_cast<long>(blockProps.start), static_cast<long>(blockProps.end));
    255            errorCode=U_PARSE_ERROR;
    256            return nullptr;
    257        }
    258        props=&cpProps;
    259        break;
    260    default:
    261        // Will not occur because of the range check above.
    262        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    263        return nullptr;
    264    }
    265    props->start=start;
    266    props->end=end;
    267    while((field=nextField())!=nullptr) {
    268        if(!parseProperty(*props, field, newValues, errorCode)) { return nullptr; }
    269    }
    270    if(lineType==BLOCK_LINE) {
    271        blockValues=newValues;
    272    } else if(lineType==UNASSIGNED_LINE && insideBlock) {
    273        // Unset newValues for values that are the same as the block values.
    274        for(int32_t prop=0; prop<UCHAR_BINARY_LIMIT; ++prop) {
    275            if(newValues.contains(prop) && cpProps.binProps[prop]==blockProps.binProps[prop]) {
    276                newValues.remove(prop);
    277            }
    278        }
    279        for(int32_t prop=UCHAR_INT_START; prop<UCHAR_INT_LIMIT; ++prop) {
    280            int32_t index=prop-UCHAR_INT_START;
    281            if(newValues.contains(prop) && cpProps.intProps[index]==blockProps.intProps[index]) {
    282                newValues.remove(prop);
    283            }
    284        }
    285    }
    286    return props;
    287 }
    288 
    289 static const struct {
    290    const char *name;
    291    int32_t prop;
    292 } ppucdProperties[]={
    293    { "Name_Alias", PPUCD_NAME_ALIAS },
    294    { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
    295    { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
    296 };
    297 
    298 // Returns true for "ok to continue parsing fields".
    299 UBool
    300 PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
    301                            UErrorCode &errorCode) {
    302    CharString pBuffer;
    303    const char *p=field;
    304    const char *v=strchr(p, '=');
    305    int binaryValue;
    306    if(*p=='-') {
    307        if(v!=nullptr) {
    308            fprintf(stderr,
    309                    "error in preparsed UCD: mix of binary-property-no and "
    310                    "enum-property syntax '%s' on line %ld\n",
    311                    field, static_cast<long>(lineNumber));
    312            errorCode=U_PARSE_ERROR;
    313            return false;
    314        }
    315        binaryValue=0;
    316        ++p;
    317    } else if(v==nullptr) {
    318        binaryValue=1;
    319    } else {
    320        binaryValue=-1;
    321        // Copy out the property name rather than modifying the field (writing a NUL).
    322        pBuffer.append(p, static_cast<int32_t>(v - p), errorCode);
    323        p=pBuffer.data();
    324        ++v;
    325    }
    326    int32_t prop=pnames->getPropertyEnum(p);
    327    if(prop<0) {
    328        for(int32_t i=0;; ++i) {
    329            if(i==UPRV_LENGTHOF(ppucdProperties)) {
    330                // Ignore unknown property names.
    331                return true;
    332            }
    333            if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
    334                prop=ppucdProperties[i].prop;
    335                U_ASSERT(prop>=0);
    336                break;
    337            }
    338        }
    339    }
    340    if(prop<UCHAR_BINARY_LIMIT) {
    341        if(binaryValue>=0) {
    342            props.binProps[prop] = static_cast<UBool>(binaryValue);
    343        } else {
    344            // No binary value for a binary property.
    345            fprintf(stderr,
    346                    "error in preparsed UCD: enum-property syntax '%s' "
    347                    "for binary property on line %ld\n",
    348                    field, static_cast<long>(lineNumber));
    349            errorCode=U_PARSE_ERROR;
    350        }
    351    } else if(binaryValue>=0) {
    352        // Binary value for a non-binary property.
    353        fprintf(stderr,
    354                "error in preparsed UCD: binary-property syntax '%s' "
    355                "for non-binary property on line %ld\n",
    356                field, static_cast<long>(lineNumber));
    357        errorCode=U_PARSE_ERROR;
    358    } else if (prop < UCHAR_INT_START) {
    359        fprintf(stderr,
    360                "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
    361                prop, static_cast<long>(lineNumber));
    362        errorCode=U_PARSE_ERROR;
    363    } else if(prop<UCHAR_INT_LIMIT) {
    364        int32_t value=pnames->getPropertyValueEnum(prop, v);
    365        if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
    366            // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
    367            char *end;
    368            unsigned long ccc=uprv_strtoul(v, &end, 10);
    369            if(v<end && *end==0 && ccc<=254) {
    370                value = static_cast<int32_t>(ccc);
    371            }
    372        }
    373        if(value==UCHAR_INVALID_CODE) {
    374            fprintf(stderr,
    375                    "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
    376                    field, static_cast<long>(lineNumber));
    377            errorCode=U_PARSE_ERROR;
    378        } else {
    379            props.intProps[prop-UCHAR_INT_START]=value;
    380        }
    381    } else if(*v=='<') {
    382        // Do not parse default values like <code point>, just set null values.
    383        switch(prop) {
    384        case UCHAR_BIDI_MIRRORING_GLYPH:
    385            props.bmg=U_SENTINEL;
    386            break;
    387        case UCHAR_BIDI_PAIRED_BRACKET:
    388            props.bpb=U_SENTINEL;
    389            break;
    390        case UCHAR_SIMPLE_CASE_FOLDING:
    391            props.scf=U_SENTINEL;
    392            break;
    393        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
    394            props.slc=U_SENTINEL;
    395            break;
    396        case UCHAR_SIMPLE_TITLECASE_MAPPING:
    397            props.stc=U_SENTINEL;
    398            break;
    399        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
    400            props.suc=U_SENTINEL;
    401            break;
    402        case UCHAR_CASE_FOLDING:
    403            props.cf.remove();
    404            break;
    405        case UCHAR_LOWERCASE_MAPPING:
    406            props.lc.remove();
    407            break;
    408        case UCHAR_TITLECASE_MAPPING:
    409            props.tc.remove();
    410            break;
    411        case UCHAR_UPPERCASE_MAPPING:
    412            props.uc.remove();
    413            break;
    414        case UCHAR_SCRIPT_EXTENSIONS:
    415            props.scx.clear();
    416            break;
    417        default:
    418            fprintf(stderr,
    419                    "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
    420                    field, static_cast<long>(lineNumber));
    421            errorCode=U_PARSE_ERROR;
    422        }
    423    } else {
    424        char c;
    425        switch(prop) {
    426        case UCHAR_NUMERIC_VALUE:
    427            props.numericValue=v;
    428            c=*v;
    429            if('0'<=c && c<='9' && v[1]==0) {
    430                props.digitValue=c-'0';
    431            } else {
    432                props.digitValue=-1;
    433            }
    434            break;
    435        case UCHAR_NAME:
    436            props.name=v;
    437            break;
    438        case UCHAR_AGE:
    439            u_versionFromString(props.age, v);  // Writes 0.0.0.0 if v is not numeric.
    440            break;
    441        case UCHAR_BIDI_MIRRORING_GLYPH:
    442            props.bmg=parseCodePoint(v, errorCode);
    443            break;
    444        case UCHAR_BIDI_PAIRED_BRACKET:
    445            props.bpb=parseCodePoint(v, errorCode);
    446            break;
    447        case UCHAR_SIMPLE_CASE_FOLDING:
    448            props.scf=parseCodePoint(v, errorCode);
    449            break;
    450        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
    451            props.slc=parseCodePoint(v, errorCode);
    452            break;
    453        case UCHAR_SIMPLE_TITLECASE_MAPPING:
    454            props.stc=parseCodePoint(v, errorCode);
    455            break;
    456        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
    457            props.suc=parseCodePoint(v, errorCode);
    458            break;
    459        case UCHAR_CASE_FOLDING:
    460            parseString(v, props.cf, errorCode);
    461            break;
    462        case UCHAR_LOWERCASE_MAPPING:
    463            parseString(v, props.lc, errorCode);
    464            break;
    465        case UCHAR_TITLECASE_MAPPING:
    466            parseString(v, props.tc, errorCode);
    467            break;
    468        case UCHAR_UPPERCASE_MAPPING:
    469            parseString(v, props.uc, errorCode);
    470            break;
    471        case PPUCD_NAME_ALIAS:
    472            props.nameAlias=v;
    473            break;
    474        case PPUCD_CONDITIONAL_CASE_MAPPINGS:
    475        case PPUCD_TURKIC_CASE_FOLDING:
    476            // No need to parse their values: They are hardcoded in the runtime library.
    477            break;
    478        case UCHAR_SCRIPT_EXTENSIONS:
    479            parseScriptExtensions(v, props.scx, errorCode);
    480            break;
    481        case UCHAR_IDENTIFIER_TYPE:
    482            parseIdentifierType(v, props.idType, errorCode);
    483            break;
    484        default:
    485            // Ignore unhandled properties.
    486            return true;
    487        }
    488    }
    489    if(U_SUCCESS(errorCode)) {
    490        newValues.add(static_cast<UChar32>(prop));
    491        return true;
    492    } else {
    493        return false;
    494    }
    495 }
    496 
    497 UBool
    498 PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
    499    if(U_FAILURE(errorCode)) { return false; }
    500    if(lineType!=ALG_NAMES_RANGE_LINE) {
    501        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    502        return false;
    503    }
    504    firstField();
    505    const char *field=nextField();
    506    if(field==nullptr) {
    507        // No range field after the type.
    508        fprintf(stderr,
    509                "error in preparsed UCD: missing algnamesrange range field "
    510                "(no second field) on line %ld\n",
    511                static_cast<long>(lineNumber));
    512        errorCode=U_PARSE_ERROR;
    513        return false;
    514    }
    515    return parseCodePointRange(field, start, end, errorCode);
    516 }
    517 
    518 UChar32
    519 PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
    520    char *end;
    521    uint32_t value = static_cast<uint32_t>(uprv_strtoul(s, &end, 16));
    522    if(end<=s || *end!=0 || value>=0x110000) {
    523        fprintf(stderr,
    524                "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
    525                s, static_cast<long>(lineNumber));
    526        errorCode=U_PARSE_ERROR;
    527        return U_SENTINEL;
    528    }
    529    return static_cast<UChar32>(value);
    530 }
    531 
    532 UBool
    533 PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
    534    uint32_t st, e;
    535    u_parseCodePointRange(s, &st, &e, &errorCode);
    536    if(U_FAILURE(errorCode)) {
    537        fprintf(stderr,
    538                "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
    539                s, static_cast<long>(lineNumber));
    540        return false;
    541    }
    542    start = static_cast<UChar32>(st);
    543    end = static_cast<UChar32>(e);
    544    return true;
    545 }
    546 
    547 void
    548 PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
    549    char16_t *buffer=toUCharPtr(uni.getBuffer(-1));
    550    int32_t length=u_parseString(s, buffer, uni.getCapacity(), nullptr, &errorCode);
    551    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
    552        errorCode=U_ZERO_ERROR;
    553        uni.releaseBuffer(0);
    554        buffer=toUCharPtr(uni.getBuffer(length));
    555        length=u_parseString(s, buffer, uni.getCapacity(), nullptr, &errorCode);
    556    }
    557    uni.releaseBuffer(length);
    558    if(U_FAILURE(errorCode)) {
    559        fprintf(stderr,
    560                "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
    561                s, static_cast<long>(lineNumber));
    562    }
    563 }
    564 
    565 void
    566 PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
    567    if(U_FAILURE(errorCode)) { return; }
    568    scx.clear();
    569    CharString scString;
    570    for(;;) {
    571        const char *scs;
    572        const char *scLimit=strchr(s, ' ');
    573        if(scLimit!=nullptr) {
    574            scs = scString.clear().append(s, static_cast<int32_t>(scLimit - s), errorCode).data();
    575            if(U_FAILURE(errorCode)) { return; }
    576        } else {
    577            scs=s;
    578        }
    579        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
    580        if(script==UCHAR_INVALID_CODE) {
    581            fprintf(stderr,
    582                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
    583                    scs, static_cast<long>(lineNumber));
    584            errorCode=U_PARSE_ERROR;
    585            return;
    586        } else if(scx.contains(script)) {
    587            fprintf(stderr,
    588                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
    589                    scs, static_cast<long>(lineNumber));
    590            errorCode=U_PARSE_ERROR;
    591            return;
    592        } else {
    593            scx.add(script);
    594        }
    595        if(scLimit!=nullptr) {
    596            s=scLimit+1;
    597        } else {
    598            break;
    599        }
    600    }
    601    if(scx.isEmpty()) {
    602        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", static_cast<long>(lineNumber));
    603        errorCode=U_PARSE_ERROR;
    604    }
    605 }
    606 
    607 void
    608 PreparsedUCD::parseIdentifierType(const char *s, UnicodeSet &idType, UErrorCode &errorCode) {
    609    if(U_FAILURE(errorCode)) { return; }
    610    idType.clear();
    611    CharString typeString;
    612    for(;;) {
    613        const char *typeChars;
    614        const char *limit=strchr(s, ' ');
    615        if(limit!=nullptr) {
    616            typeChars = typeString.clear().append(s, static_cast<int32_t>(limit - s), errorCode).data();
    617            if(U_FAILURE(errorCode)) { return; }
    618        } else {
    619            typeChars=s;
    620        }
    621        int32_t type=pnames->getPropertyValueEnum(UCHAR_IDENTIFIER_TYPE, typeChars);
    622        if(type==UCHAR_INVALID_CODE) {
    623            fprintf(stderr,
    624                    "error in preparsed UCD: '%s' is not a valid Identifier_Type on line %ld\n",
    625                    typeChars, static_cast<long>(lineNumber));
    626            errorCode=U_PARSE_ERROR;
    627            return;
    628        } else if(idType.contains(type)) {
    629            fprintf(stderr,
    630                    "error in preparsed UCD: Identifier_Type has duplicate '%s' values on line %ld\n",
    631                    typeChars, static_cast<long>(lineNumber));
    632            errorCode=U_PARSE_ERROR;
    633            return;
    634        } else {
    635            idType.add(type);
    636        }
    637        if(limit!=nullptr) {
    638            s=limit+1;
    639        } else {
    640            break;
    641        }
    642    }
    643    if(idType.isEmpty()) {
    644        fprintf(stderr,
    645                "error in preparsed UCD: empty Identifier_Type= on line %ld\n",
    646                static_cast<long>(lineNumber));
    647        errorCode=U_PARSE_ERROR;
    648    }
    649 }
    650 
    651 U_NAMESPACE_END