tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

choicfmt.cpp (18734B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
      6 * others. All Rights Reserved.                                                *
      7 *******************************************************************************
      8 *
      9 * File CHOICFMT.CPP
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   02/19/97    aliu        Converted from java.
     15 *   03/20/97    helena      Finished first cut of implementation and got rid 
     16 *                           of nextDouble/previousDouble and replaced with
     17 *                           boolean array.
     18 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
     19 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include 
     20 *                           wchar.h.
     21 *   07/09/97    helena      Made ParsePosition into a class.
     22 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
     23 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
     24 *   02/22/99    stephen     Removed character literals for EBCDIC safety
     25 ********************************************************************************
     26 */
     27 
     28 #include "unicode/utypes.h"
     29 
     30 #if !UCONFIG_NO_FORMATTING
     31 
     32 #include "unicode/choicfmt.h"
     33 #include "unicode/numfmt.h"
     34 #include "unicode/locid.h"
     35 #include "cpputils.h"
     36 #include "cstring.h"
     37 #include "messageimpl.h"
     38 #include "putilimp.h"
     39 #include "uassert.h"
     40 #include <stdio.h>
     41 #include <float.h>
     42 
     43 // *****************************************************************************
     44 // class ChoiceFormat
     45 // *****************************************************************************
     46 
     47 U_NAMESPACE_BEGIN
     48 
     49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
     50 
     51 // Special characters used by ChoiceFormat.  There are two characters
     52 // used interchangeably to indicate <=.  Either is parsed, but only
     53 // LESS_EQUAL is generated by toPattern().
     54 #define SINGLE_QUOTE ((char16_t)0x0027)   /*'*/
     55 #define LESS_THAN    ((char16_t)0x003C)   /*<*/
     56 #define LESS_EQUAL   ((char16_t)0x0023)   /*#*/
     57 #define LESS_EQUAL2  ((char16_t)0x2264)
     58 #define VERTICAL_BAR ((char16_t)0x007C)   /*|*/
     59 #define MINUS        ((char16_t)0x002D)   /*-*/
     60 
     61 static const char16_t LEFT_CURLY_BRACE = 0x7B;     /*{*/
     62 static const char16_t RIGHT_CURLY_BRACE = 0x7D;    /*}*/
     63 
     64 #ifdef INFINITY
     65 #undef INFINITY
     66 #endif
     67 #define INFINITY     ((char16_t)0x221E)
     68 
     69 //static const char16_t gPositiveInfinity[] = {INFINITY, 0};
     70 //static const char16_t gNegativeInfinity[] = {MINUS, INFINITY, 0};
     71 #define POSITIVE_INF_STRLEN 1
     72 #define NEGATIVE_INF_STRLEN 2
     73 
     74 // -------------------------------------
     75 // Creates a ChoiceFormat instance based on the pattern.
     76 
     77 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
     78                           UErrorCode& status)
     79 : constructorErrorCode(status),
     80  msgPattern(status)
     81 {
     82    applyPattern(newPattern, status);
     83 }
     84 
     85 // -------------------------------------
     86 // Creates a ChoiceFormat instance with the limit array and 
     87 // format strings for each limit.
     88 
     89 ChoiceFormat::ChoiceFormat(const double* limits, 
     90                           const UnicodeString* formats, 
     91                           int32_t cnt )
     92 : constructorErrorCode(U_ZERO_ERROR),
     93  msgPattern(constructorErrorCode)
     94 {
     95    setChoices(limits, nullptr, formats, cnt, constructorErrorCode);
     96 }
     97 
     98 // -------------------------------------
     99 
    100 ChoiceFormat::ChoiceFormat(const double* limits, 
    101                           const UBool* closures,
    102                           const UnicodeString* formats, 
    103                           int32_t cnt )
    104 : constructorErrorCode(U_ZERO_ERROR),
    105  msgPattern(constructorErrorCode)
    106 {
    107    setChoices(limits, closures, formats, cnt, constructorErrorCode);
    108 }
    109 
    110 // -------------------------------------
    111 // copy constructor
    112 
    113 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that) 
    114 : NumberFormat(that),
    115  constructorErrorCode(that.constructorErrorCode),
    116  msgPattern(that.msgPattern)
    117 {
    118 }
    119 
    120 // -------------------------------------
    121 // Private constructor that creates a 
    122 // ChoiceFormat instance based on the 
    123 // pattern and populates UParseError
    124 
    125 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
    126                           UParseError& parseError,
    127                           UErrorCode& status)
    128 : constructorErrorCode(status),
    129  msgPattern(status)
    130 {
    131    applyPattern(newPattern,parseError, status);
    132 }
    133 // -------------------------------------
    134 
    135 bool
    136 ChoiceFormat::operator==(const Format& that) const
    137 {
    138    if (this == &that) return true;
    139    if (!NumberFormat::operator==(that)) return false;
    140    const ChoiceFormat& thatAlias = static_cast<const ChoiceFormat&>(that);
    141    return msgPattern == thatAlias.msgPattern;
    142 }
    143 
    144 // -------------------------------------
    145 // copy constructor
    146 
    147 const ChoiceFormat&
    148 ChoiceFormat::operator=(const   ChoiceFormat& that)
    149 {
    150    if (this != &that) {
    151        NumberFormat::operator=(that);
    152        constructorErrorCode = that.constructorErrorCode;
    153        msgPattern = that.msgPattern;
    154    }
    155    return *this;
    156 }
    157 
    158 // -------------------------------------
    159 
    160 ChoiceFormat::~ChoiceFormat()
    161 {
    162 }
    163 
    164 // -------------------------------------
    165 
    166 /**
    167 * Convert a double value to a string without the overhead of NumberFormat.
    168 */
    169 UnicodeString&
    170 ChoiceFormat::dtos(double value,
    171                   UnicodeString& string)
    172 {
    173    /* Buffer to contain the digits and any extra formatting stuff. */
    174    char temp[DBL_DIG + 16];
    175    char *itrPtr = temp;
    176    char *expPtr;
    177 
    178    snprintf(temp, sizeof(temp), "%.*g", DBL_DIG, value);
    179 
    180    /* Find and convert the decimal point.
    181       Using setlocale on some machines will cause snprintf to use a comma for certain locales.
    182    */
    183    while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
    184        itrPtr++;
    185    }
    186    if (*itrPtr != 0 && *itrPtr != 'e') {
    187        /* We reached something that looks like a decimal point.
    188        In case someone used setlocale(), which changes the decimal point. */
    189        *itrPtr = '.';
    190        itrPtr++;
    191    }
    192    /* Search for the exponent */
    193    while (*itrPtr && *itrPtr != 'e') {
    194        itrPtr++;
    195    }
    196    if (*itrPtr == 'e') {
    197        itrPtr++;
    198        /* Verify the exponent sign */
    199        if (*itrPtr == '+' || *itrPtr == '-') {
    200            itrPtr++;
    201        }
    202        /* Remove leading zeros. You will see this on Windows machines. */
    203        expPtr = itrPtr;
    204        while (*itrPtr == '0') {
    205            itrPtr++;
    206        }
    207        if (*itrPtr && expPtr != itrPtr) {
    208            /* Shift the exponent without zeros. */
    209            while (*itrPtr) {
    210                *(expPtr++)  = *(itrPtr++);
    211            }
    212            // NUL terminate
    213            *expPtr = 0;
    214        }
    215    }
    216 
    217    string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
    218    return string;
    219 }
    220 
    221 // -------------------------------------
    222 // calls the overloaded applyPattern method.
    223 
    224 void
    225 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    226                           UErrorCode& status)
    227 {
    228    msgPattern.parseChoiceStyle(pattern, nullptr, status);
    229    constructorErrorCode = status;
    230 }
    231 
    232 // -------------------------------------
    233 // Applies the pattern to this ChoiceFormat instance.
    234 
    235 void
    236 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    237                           UParseError& parseError,
    238                           UErrorCode& status)
    239 {
    240    msgPattern.parseChoiceStyle(pattern, &parseError, status);
    241    constructorErrorCode = status;
    242 }
    243 // -------------------------------------
    244 // Returns the input pattern string.
    245 
    246 UnicodeString&
    247 ChoiceFormat::toPattern(UnicodeString& result) const
    248 {
    249    return result = msgPattern.getPatternString();
    250 }
    251 
    252 // -------------------------------------
    253 // Sets the limit and format arrays. 
    254 void
    255 ChoiceFormat::setChoices(  const double* limits, 
    256                           const UnicodeString* formats, 
    257                           int32_t cnt )
    258 {
    259    UErrorCode errorCode = U_ZERO_ERROR;
    260    setChoices(limits, nullptr, formats, cnt, errorCode);
    261 }
    262 
    263 // -------------------------------------
    264 // Sets the limit and format arrays. 
    265 void
    266 ChoiceFormat::setChoices(  const double* limits, 
    267                           const UBool* closures,
    268                           const UnicodeString* formats, 
    269                           int32_t cnt )
    270 {
    271    UErrorCode errorCode = U_ZERO_ERROR;
    272    setChoices(limits, closures, formats, cnt, errorCode);
    273 }
    274 
    275 void
    276 ChoiceFormat::setChoices(const double* limits,
    277                         const UBool* closures,
    278                         const UnicodeString* formats,
    279                         int32_t count,
    280                         UErrorCode &errorCode) {
    281    if (U_FAILURE(errorCode)) {
    282        return;
    283    }
    284    if (limits == nullptr || formats == nullptr) {
    285        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    286        return;
    287    }
    288    // Reconstruct the original input pattern.
    289    // Modified version of the pre-ICU 4.8 toPattern() implementation.
    290    UnicodeString result;
    291    for (int32_t i = 0; i < count; ++i) {
    292        if (i != 0) {
    293            result += VERTICAL_BAR;
    294        }
    295        UnicodeString buf;
    296        if (uprv_isPositiveInfinity(limits[i])) {
    297            result += INFINITY;
    298        } else if (uprv_isNegativeInfinity(limits[i])) {
    299            result += MINUS;
    300            result += INFINITY;
    301        } else {
    302            result += dtos(limits[i], buf);
    303        }
    304        if (closures != nullptr && closures[i]) {
    305            result += LESS_THAN;
    306        } else {
    307            result += LESS_EQUAL;
    308        }
    309        // Append formats[i], using quotes if there are special
    310        // characters.  Single quotes themselves must be escaped in
    311        // either case.
    312        const UnicodeString& text = formats[i];
    313        int32_t textLength = text.length();
    314        int32_t nestingLevel = 0;
    315        for (int32_t j = 0; j < textLength; ++j) {
    316            char16_t c = text[j];
    317            if (c == SINGLE_QUOTE && nestingLevel == 0) {
    318                // Double each top-level apostrophe.
    319                result.append(c);
    320            } else if (c == VERTICAL_BAR && nestingLevel == 0) {
    321                // Surround each pipe symbol with apostrophes for quoting.
    322                // If the next character is an apostrophe, then that will be doubled,
    323                // and although the parser will see the apostrophe pairs beginning
    324                // and ending one character earlier than our doubling, the result
    325                // is as desired.
    326                //   | -> '|'
    327                //   |' -> '|'''
    328                //   |'' -> '|''''' etc.
    329                result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
    330                continue;  // Skip the append(c) at the end of the loop body.
    331            } else if (c == LEFT_CURLY_BRACE) {
    332                ++nestingLevel;
    333            } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
    334                --nestingLevel;
    335            }
    336            result.append(c);
    337        }
    338    }
    339    // Apply the reconstructed pattern.
    340    applyPattern(result, errorCode);
    341 }
    342 
    343 // -------------------------------------
    344 // Gets the limit array.
    345 
    346 const double*
    347 ChoiceFormat::getLimits(int32_t& cnt) const 
    348 {
    349    cnt = 0;
    350    return nullptr;
    351 }
    352 
    353 // -------------------------------------
    354 // Gets the closures array.
    355 
    356 const UBool*
    357 ChoiceFormat::getClosures(int32_t& cnt) const 
    358 {
    359    cnt = 0;
    360    return nullptr;
    361 }
    362 
    363 // -------------------------------------
    364 // Gets the format array.
    365 
    366 const UnicodeString*
    367 ChoiceFormat::getFormats(int32_t& cnt) const
    368 {
    369    cnt = 0;
    370    return nullptr;
    371 }
    372 
    373 // -------------------------------------
    374 // Formats an int64 number, it's actually formatted as
    375 // a double.  The returned format string may differ
    376 // from the input number because of this.
    377 
    378 UnicodeString&
    379 ChoiceFormat::format(int64_t number, 
    380                     UnicodeString& appendTo, 
    381                     FieldPosition& status) const
    382 {
    383    return format(static_cast<double>(number), appendTo, status);
    384 }
    385 
    386 // -------------------------------------
    387 // Formats an int32_t number, it's actually formatted as
    388 // a double.
    389 
    390 UnicodeString&
    391 ChoiceFormat::format(int32_t number, 
    392                     UnicodeString& appendTo, 
    393                     FieldPosition& status) const
    394 {
    395    return format(static_cast<double>(number), appendTo, status);
    396 }
    397 
    398 // -------------------------------------
    399 // Formats a double number.
    400 
    401 UnicodeString&
    402 ChoiceFormat::format(double number, 
    403                     UnicodeString& appendTo, 
    404                     FieldPosition& /*pos*/) const
    405 {
    406    if (msgPattern.countParts() == 0) {
    407        // No pattern was applied, or it failed.
    408        return appendTo;
    409    }
    410    // Get the appropriate sub-message.
    411    int32_t msgStart = findSubMessage(msgPattern, 0, number);
    412    if (!MessageImpl::jdkAposMode(msgPattern)) {
    413        int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
    414        int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
    415        appendTo.append(msgPattern.getPatternString(),
    416                        patternStart,
    417                        msgPattern.getPatternIndex(msgLimit) - patternStart);
    418        return appendTo;
    419    }
    420    // JDK compatibility mode: Remove SKIP_SYNTAX.
    421    return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
    422 }
    423 
    424 int32_t
    425 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
    426    int32_t count = pattern.countParts();
    427    int32_t msgStart;
    428    // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
    429    // until ARG_LIMIT or end of choice-only pattern.
    430    // Ignore the first number and selector and start the loop on the first message.
    431    partIndex += 2;
    432    for (;;) {
    433        // Skip but remember the current sub-message.
    434        msgStart = partIndex;
    435        partIndex = pattern.getLimitPartIndex(partIndex);
    436        if (++partIndex >= count) {
    437            // Reached the end of the choice-only pattern.
    438            // Return with the last sub-message.
    439            break;
    440        }
    441        const MessagePattern::Part &part = pattern.getPart(partIndex++);
    442        UMessagePatternPartType type = part.getType();
    443        if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
    444            // Reached the end of the ChoiceFormat style.
    445            // Return with the last sub-message.
    446            break;
    447        }
    448        // part is an ARG_INT or ARG_DOUBLE
    449        U_ASSERT(MessagePattern::Part::hasNumericValue(type));
    450        double boundary = pattern.getNumericValue(part);
    451        // Fetch the ARG_SELECTOR character.
    452        int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
    453        char16_t boundaryChar = pattern.getPatternString().charAt(selectorIndex);
    454        if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
    455            // The number is in the interval between the previous boundary and the current one.
    456            // Return with the sub-message between them.
    457            // The !(a>b) and !(a>=b) comparisons are equivalent to
    458            // (a<=b) and (a<b) except they "catch" NaN.
    459            break;
    460        }
    461    }
    462    return msgStart;
    463 }
    464 
    465 // -------------------------------------
    466 // Formats an array of objects. Checks if the data type of the objects
    467 // to get the right value for formatting.  
    468 
    469 UnicodeString&
    470 ChoiceFormat::format(const Formattable* objs,
    471                     int32_t cnt,
    472                     UnicodeString& appendTo,
    473                     FieldPosition& pos,
    474                     UErrorCode& status) const
    475 {
    476    if(cnt < 0) {
    477        status = U_ILLEGAL_ARGUMENT_ERROR;
    478        return appendTo;
    479    }
    480    if (msgPattern.countParts() == 0) {
    481        status = U_INVALID_STATE_ERROR;
    482        return appendTo;
    483    }
    484 
    485    for (int32_t i = 0; i < cnt; i++) {
    486        double objDouble = objs[i].getDouble(status);
    487        if (U_SUCCESS(status)) {
    488            format(objDouble, appendTo, pos);
    489        }
    490    }
    491 
    492    return appendTo;
    493 }
    494 
    495 // -------------------------------------
    496 
    497 void
    498 ChoiceFormat::parse(const UnicodeString& text, 
    499                    Formattable& result,
    500                    ParsePosition& pos) const
    501 {
    502    result.setDouble(parseArgument(msgPattern, 0, text, pos));
    503 }
    504 
    505 double
    506 ChoiceFormat::parseArgument(
    507        const MessagePattern &pattern, int32_t partIndex,
    508        const UnicodeString &source, ParsePosition &pos) {
    509    // find the best number (defined as the one with the longest parse)
    510    int32_t start = pos.getIndex();
    511    int32_t furthest = start;
    512    double bestNumber = uprv_getNaN();
    513    double tempNumber = 0.0;
    514    int32_t count = pattern.countParts();
    515    while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
    516        tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
    517        partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
    518        int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
    519        int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
    520        if (len >= 0) {
    521            int32_t newIndex = start + len;
    522            if (newIndex > furthest) {
    523                furthest = newIndex;
    524                bestNumber = tempNumber;
    525                if (furthest == source.length()) {
    526                    break;
    527                }
    528            }
    529        }
    530        partIndex = msgLimit + 1;
    531    }
    532    if (furthest == start) {
    533        pos.setErrorIndex(start);
    534    } else {
    535        pos.setIndex(furthest);
    536    }
    537    return bestNumber;
    538 }
    539 
    540 int32_t
    541 ChoiceFormat::matchStringUntilLimitPart(
    542        const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
    543        const UnicodeString &source, int32_t sourceOffset) {
    544    int32_t matchingSourceLength = 0;
    545    const UnicodeString &msgString = pattern.getPatternString();
    546    int32_t prevIndex = pattern.getPart(partIndex).getLimit();
    547    for (;;) {
    548        const MessagePattern::Part &part = pattern.getPart(++partIndex);
    549        if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
    550            int32_t index = part.getIndex();
    551            int32_t length = index - prevIndex;
    552            if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
    553                return -1;  // mismatch
    554            }
    555            matchingSourceLength += length;
    556            if (partIndex == limitPartIndex) {
    557                return matchingSourceLength;
    558            }
    559            prevIndex = part.getLimit();  // SKIP_SYNTAX
    560        }
    561    }
    562 }
    563 
    564 // -------------------------------------
    565 
    566 ChoiceFormat*
    567 ChoiceFormat::clone() const
    568 {
    569    ChoiceFormat *aCopy = new ChoiceFormat(*this);
    570    return aCopy;
    571 }
    572 
    573 U_NAMESPACE_END
    574 
    575 #endif /* #if !UCONFIG_NO_FORMATTING */
    576 
    577 //eof