tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

simpleformatter.cpp (12036B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 * Copyright (C) 2014-2016, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 ******************************************************************************
      8 * simpleformatter.cpp
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 #include "unicode/simpleformatter.h"
     13 #include "unicode/unistr.h"
     14 #include "uassert.h"
     15 
     16 U_NAMESPACE_BEGIN
     17 
     18 namespace {
     19 
     20 /**
     21 * Argument numbers must be smaller than this limit.
     22 * Text segment lengths are offset by this much.
     23 * This is currently the only unused char value in compiled patterns,
     24 * except it is the maximum value of the first unit (max arg +1).
     25 */
     26 const int32_t ARG_NUM_LIMIT = 0x100;
     27 /**
     28 * Initial and maximum char/char16_t value set for a text segment.
     29 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
     30 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
     31 */
     32 const char16_t SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
     33 /**
     34 * Maximum length of a text segment. Longer segments are split into shorter ones.
     35 */
     36 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
     37 
     38 enum {
     39    APOS = 0x27,
     40    DIGIT_ZERO = 0x30,
     41    DIGIT_ONE = 0x31,
     42    DIGIT_NINE = 0x39,
     43    OPEN_BRACE = 0x7b,
     44    CLOSE_BRACE = 0x7d
     45 };
     46 
     47 inline UBool isInvalidArray(const void *array, int32_t length) {
     48   return (length < 0 || (array == nullptr && length != 0));
     49 }
     50 
     51 }  // namespace
     52 
     53 SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
     54    if (this == &other) {
     55        return *this;
     56    }
     57    compiledPattern = other.compiledPattern;
     58    return *this;
     59 }
     60 
     61 SimpleFormatter::~SimpleFormatter() {}
     62 
     63 UBool SimpleFormatter::applyPatternMinMaxArguments(
     64        const UnicodeString &pattern,
     65        int32_t min, int32_t max,
     66        UErrorCode &errorCode) {
     67    if (U_FAILURE(errorCode)) {
     68        return false;
     69    }
     70    // Parse consistent with MessagePattern, but
     71    // - support only simple numbered arguments
     72    // - build a simple binary structure into the result string
     73    const char16_t *patternBuffer = pattern.getBuffer();
     74    int32_t patternLength = pattern.length();
     75    // Reserve the first char for the number of arguments.
     76    compiledPattern.setTo(static_cast<char16_t>(0));
     77    int32_t textLength = 0;
     78    int32_t maxArg = -1;
     79    UBool inQuote = false;
     80    for (int32_t i = 0; i < patternLength;) {
     81        char16_t c = patternBuffer[i++];
     82        if (c == APOS) {
     83            if (i < patternLength && (c = patternBuffer[i]) == APOS) {
     84                // double apostrophe, skip the second one
     85                ++i;
     86            } else if (inQuote) {
     87                // skip the quote-ending apostrophe
     88                inQuote = false;
     89                continue;
     90            } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
     91                // Skip the quote-starting apostrophe, find the end of the quoted literal text.
     92                ++i;
     93                inQuote = true;
     94            } else {
     95                // The apostrophe is part of literal text.
     96                c = APOS;
     97            }
     98        } else if (!inQuote && c == OPEN_BRACE) {
     99            if (textLength > 0) {
    100                compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
    101                                          static_cast<char16_t>(ARG_NUM_LIMIT + textLength));
    102                textLength = 0;
    103            }
    104            int32_t argNumber;
    105            if ((i + 1) < patternLength &&
    106                    0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
    107                    patternBuffer[i + 1] == CLOSE_BRACE) {
    108                i += 2;
    109            } else {
    110                // Multi-digit argument number (no leading zero) or syntax error.
    111                // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
    112                // around the number, but this class does not.
    113                argNumber = -1;
    114                if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
    115                    argNumber = c - DIGIT_ZERO;
    116                    while (i < patternLength &&
    117                            DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
    118                        argNumber = argNumber * 10 + (c - DIGIT_ZERO);
    119                        if (argNumber >= ARG_NUM_LIMIT) {
    120                            break;
    121                        }
    122                    }
    123                }
    124                if (argNumber < 0 || c != CLOSE_BRACE) {
    125                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    126                    return false;
    127                }
    128            }
    129            if (argNumber > maxArg) {
    130                maxArg = argNumber;
    131            }
    132            compiledPattern.append(static_cast<char16_t>(argNumber));
    133            continue;
    134        }  // else: c is part of literal text
    135        // Append c and track the literal-text segment length.
    136        if (textLength == 0) {
    137            // Reserve a char for the length of a new text segment, preset the maximum length.
    138            compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
    139        }
    140        compiledPattern.append(c);
    141        if (++textLength == MAX_SEGMENT_LENGTH) {
    142            textLength = 0;
    143        }
    144    }
    145    if (textLength > 0) {
    146        compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
    147                                  static_cast<char16_t>(ARG_NUM_LIMIT + textLength));
    148    }
    149    int32_t argCount = maxArg + 1;
    150    if (argCount < min || max < argCount) {
    151        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    152        return false;
    153    }
    154    compiledPattern.setCharAt(0, static_cast<char16_t>(argCount));
    155    return true;
    156 }
    157 
    158 UnicodeString& SimpleFormatter::format(
    159        const UnicodeString &value0,
    160        UnicodeString &appendTo, UErrorCode &errorCode) const {
    161    const UnicodeString *values[] = { &value0 };
    162    return formatAndAppend(values, 1, appendTo, nullptr, 0, errorCode);
    163 }
    164 
    165 UnicodeString& SimpleFormatter::format(
    166        const UnicodeString &value0,
    167        const UnicodeString &value1,
    168        UnicodeString &appendTo, UErrorCode &errorCode) const {
    169    const UnicodeString *values[] = { &value0, &value1 };
    170    return formatAndAppend(values, 2, appendTo, nullptr, 0, errorCode);
    171 }
    172 
    173 UnicodeString& SimpleFormatter::format(
    174        const UnicodeString &value0,
    175        const UnicodeString &value1,
    176        const UnicodeString &value2,
    177        UnicodeString &appendTo, UErrorCode &errorCode) const {
    178    const UnicodeString *values[] = { &value0, &value1, &value2 };
    179    return formatAndAppend(values, 3, appendTo, nullptr, 0, errorCode);
    180 }
    181 
    182 UnicodeString& SimpleFormatter::formatAndAppend(
    183        const UnicodeString *const *values, int32_t valuesLength,
    184        UnicodeString &appendTo,
    185        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    186    if (U_FAILURE(errorCode)) {
    187        return appendTo;
    188    }
    189    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
    190            valuesLength < getArgumentLimit()) {
    191        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    192        return appendTo;
    193    }
    194    return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
    195                  appendTo, nullptr, true,
    196                  offsets, offsetsLength, errorCode);
    197 }
    198 
    199 UnicodeString &SimpleFormatter::formatAndReplace(
    200        const UnicodeString *const *values, int32_t valuesLength,
    201        UnicodeString &result,
    202        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    203    if (U_FAILURE(errorCode)) {
    204        return result;
    205    }
    206    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
    207        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    208        return result;
    209    }
    210    const char16_t *cp = compiledPattern.getBuffer();
    211    int32_t cpLength = compiledPattern.length();
    212    if (valuesLength < getArgumentLimit(cp, cpLength)) {
    213        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    214        return result;
    215    }
    216 
    217    // If the pattern starts with an argument whose value is the same object
    218    // as the result, then we keep the result contents and append to it.
    219    // Otherwise we replace its contents.
    220    int32_t firstArg = -1;
    221    // If any non-initial argument value is the same object as the result,
    222    // then we first copy its contents and use that instead while formatting.
    223    UnicodeString resultCopy;
    224    if (getArgumentLimit(cp, cpLength) > 0) {
    225        for (int32_t i = 1; i < cpLength;) {
    226            int32_t n = cp[i++];
    227            if (n < ARG_NUM_LIMIT) {
    228                if (values[n] == &result) {
    229                    if (i == 2) {
    230                        firstArg = n;
    231                    } else if (resultCopy.isEmpty() && !result.isEmpty()) {
    232                        resultCopy = result;
    233                    }
    234                }
    235            } else {
    236                i += n - ARG_NUM_LIMIT;
    237            }
    238        }
    239    }
    240    if (firstArg < 0) {
    241        result.remove();
    242    }
    243    return format(cp, cpLength, values,
    244                  result, &resultCopy, false,
    245                  offsets, offsetsLength, errorCode);
    246 }
    247 
    248 UnicodeString SimpleFormatter::getTextWithNoArguments(
    249        const char16_t *compiledPattern,
    250        int32_t compiledPatternLength,
    251        int32_t* offsets,
    252        int32_t offsetsLength) {
    253    for (int32_t i = 0; i < offsetsLength; i++) {
    254        offsets[i] = -1;
    255    }
    256    int32_t capacity = compiledPatternLength - 1 -
    257            getArgumentLimit(compiledPattern, compiledPatternLength);
    258    UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
    259    for (int32_t i = 1; i < compiledPatternLength;) {
    260        int32_t n = compiledPattern[i++];
    261        if (n > ARG_NUM_LIMIT) {
    262            n -= ARG_NUM_LIMIT;
    263            sb.append(compiledPattern + i, n);
    264            i += n;
    265        } else if (n < offsetsLength) {
    266            // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}".
    267            // Consider removing this function and replacing it with an iterator interface.
    268            offsets[n] = sb.length();
    269        }
    270    }
    271    return sb;
    272 }
    273 
    274 UnicodeString &SimpleFormatter::format(
    275        const char16_t *compiledPattern, int32_t compiledPatternLength,
    276        const UnicodeString *const *values,
    277        UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
    278        int32_t *offsets, int32_t offsetsLength,
    279        UErrorCode &errorCode) {
    280    if (U_FAILURE(errorCode)) {
    281        return result;
    282    }
    283    for (int32_t i = 0; i < offsetsLength; i++) {
    284        offsets[i] = -1;
    285    }
    286    for (int32_t i = 1; i < compiledPatternLength;) {
    287        int32_t n = compiledPattern[i++];
    288        if (n < ARG_NUM_LIMIT) {
    289            const UnicodeString *value = values[n];
    290            if (value == nullptr) {
    291                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    292                return result;
    293            }
    294            if (value == &result) {
    295                if (forbidResultAsValue) {
    296                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    297                    return result;
    298                }
    299                if (i == 2) {
    300                    // We are appending to result which is also the first value object.
    301                    if (n < offsetsLength) {
    302                        offsets[n] = 0;
    303                    }
    304                } else {
    305                    if (n < offsetsLength) {
    306                        offsets[n] = result.length();
    307                    }
    308                    result.append(*resultCopy);
    309                }
    310            } else {
    311                if (n < offsetsLength) {
    312                    offsets[n] = result.length();
    313                }
    314                result.append(*value);
    315            }
    316        } else {
    317            int32_t length = n - ARG_NUM_LIMIT;
    318            result.append(compiledPattern + i, length);
    319            i += length;
    320        }
    321    }
    322    return result;
    323 }
    324 
    325 U_NAMESPACE_END