simpleformatter.cpp (12036B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * Copyright (C) 2014-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ****************************************************************************** 8 * simpleformatter.cpp 9 */ 10 11 #include "unicode/utypes.h" 12 #include "unicode/simpleformatter.h" 13 #include "unicode/unistr.h" 14 #include "uassert.h" 15 16 U_NAMESPACE_BEGIN 17 18 namespace { 19 20 /** 21 * Argument numbers must be smaller than this limit. 22 * Text segment lengths are offset by this much. 23 * This is currently the only unused char value in compiled patterns, 24 * except it is the maximum value of the first unit (max arg +1). 25 */ 26 const int32_t ARG_NUM_LIMIT = 0x100; 27 /** 28 * Initial and maximum char/char16_t value set for a text segment. 29 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. 30 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. 31 */ 32 const char16_t SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff; 33 /** 34 * Maximum length of a text segment. Longer segments are split into shorter ones. 35 */ 36 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT; 37 38 enum { 39 APOS = 0x27, 40 DIGIT_ZERO = 0x30, 41 DIGIT_ONE = 0x31, 42 DIGIT_NINE = 0x39, 43 OPEN_BRACE = 0x7b, 44 CLOSE_BRACE = 0x7d 45 }; 46 47 inline UBool isInvalidArray(const void *array, int32_t length) { 48 return (length < 0 || (array == nullptr && length != 0)); 49 } 50 51 } // namespace 52 53 SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) { 54 if (this == &other) { 55 return *this; 56 } 57 compiledPattern = other.compiledPattern; 58 return *this; 59 } 60 61 SimpleFormatter::~SimpleFormatter() {} 62 63 UBool SimpleFormatter::applyPatternMinMaxArguments( 64 const UnicodeString &pattern, 65 int32_t min, int32_t max, 66 UErrorCode &errorCode) { 67 if (U_FAILURE(errorCode)) { 68 return false; 69 } 70 // Parse consistent with MessagePattern, but 71 // - support only simple numbered arguments 72 // - build a simple binary structure into the result string 73 const char16_t *patternBuffer = pattern.getBuffer(); 74 int32_t patternLength = pattern.length(); 75 // Reserve the first char for the number of arguments. 76 compiledPattern.setTo(static_cast<char16_t>(0)); 77 int32_t textLength = 0; 78 int32_t maxArg = -1; 79 UBool inQuote = false; 80 for (int32_t i = 0; i < patternLength;) { 81 char16_t c = patternBuffer[i++]; 82 if (c == APOS) { 83 if (i < patternLength && (c = patternBuffer[i]) == APOS) { 84 // double apostrophe, skip the second one 85 ++i; 86 } else if (inQuote) { 87 // skip the quote-ending apostrophe 88 inQuote = false; 89 continue; 90 } else if (c == OPEN_BRACE || c == CLOSE_BRACE) { 91 // Skip the quote-starting apostrophe, find the end of the quoted literal text. 92 ++i; 93 inQuote = true; 94 } else { 95 // The apostrophe is part of literal text. 96 c = APOS; 97 } 98 } else if (!inQuote && c == OPEN_BRACE) { 99 if (textLength > 0) { 100 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, 101 static_cast<char16_t>(ARG_NUM_LIMIT + textLength)); 102 textLength = 0; 103 } 104 int32_t argNumber; 105 if ((i + 1) < patternLength && 106 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 && 107 patternBuffer[i + 1] == CLOSE_BRACE) { 108 i += 2; 109 } else { 110 // Multi-digit argument number (no leading zero) or syntax error. 111 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) 112 // around the number, but this class does not. 113 argNumber = -1; 114 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { 115 argNumber = c - DIGIT_ZERO; 116 while (i < patternLength && 117 DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { 118 argNumber = argNumber * 10 + (c - DIGIT_ZERO); 119 if (argNumber >= ARG_NUM_LIMIT) { 120 break; 121 } 122 } 123 } 124 if (argNumber < 0 || c != CLOSE_BRACE) { 125 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 126 return false; 127 } 128 } 129 if (argNumber > maxArg) { 130 maxArg = argNumber; 131 } 132 compiledPattern.append(static_cast<char16_t>(argNumber)); 133 continue; 134 } // else: c is part of literal text 135 // Append c and track the literal-text segment length. 136 if (textLength == 0) { 137 // Reserve a char for the length of a new text segment, preset the maximum length. 138 compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR); 139 } 140 compiledPattern.append(c); 141 if (++textLength == MAX_SEGMENT_LENGTH) { 142 textLength = 0; 143 } 144 } 145 if (textLength > 0) { 146 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, 147 static_cast<char16_t>(ARG_NUM_LIMIT + textLength)); 148 } 149 int32_t argCount = maxArg + 1; 150 if (argCount < min || max < argCount) { 151 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 152 return false; 153 } 154 compiledPattern.setCharAt(0, static_cast<char16_t>(argCount)); 155 return true; 156 } 157 158 UnicodeString& SimpleFormatter::format( 159 const UnicodeString &value0, 160 UnicodeString &appendTo, UErrorCode &errorCode) const { 161 const UnicodeString *values[] = { &value0 }; 162 return formatAndAppend(values, 1, appendTo, nullptr, 0, errorCode); 163 } 164 165 UnicodeString& SimpleFormatter::format( 166 const UnicodeString &value0, 167 const UnicodeString &value1, 168 UnicodeString &appendTo, UErrorCode &errorCode) const { 169 const UnicodeString *values[] = { &value0, &value1 }; 170 return formatAndAppend(values, 2, appendTo, nullptr, 0, errorCode); 171 } 172 173 UnicodeString& SimpleFormatter::format( 174 const UnicodeString &value0, 175 const UnicodeString &value1, 176 const UnicodeString &value2, 177 UnicodeString &appendTo, UErrorCode &errorCode) const { 178 const UnicodeString *values[] = { &value0, &value1, &value2 }; 179 return formatAndAppend(values, 3, appendTo, nullptr, 0, errorCode); 180 } 181 182 UnicodeString& SimpleFormatter::formatAndAppend( 183 const UnicodeString *const *values, int32_t valuesLength, 184 UnicodeString &appendTo, 185 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { 186 if (U_FAILURE(errorCode)) { 187 return appendTo; 188 } 189 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) || 190 valuesLength < getArgumentLimit()) { 191 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 192 return appendTo; 193 } 194 return format(compiledPattern.getBuffer(), compiledPattern.length(), values, 195 appendTo, nullptr, true, 196 offsets, offsetsLength, errorCode); 197 } 198 199 UnicodeString &SimpleFormatter::formatAndReplace( 200 const UnicodeString *const *values, int32_t valuesLength, 201 UnicodeString &result, 202 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { 203 if (U_FAILURE(errorCode)) { 204 return result; 205 } 206 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) { 207 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 208 return result; 209 } 210 const char16_t *cp = compiledPattern.getBuffer(); 211 int32_t cpLength = compiledPattern.length(); 212 if (valuesLength < getArgumentLimit(cp, cpLength)) { 213 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 214 return result; 215 } 216 217 // If the pattern starts with an argument whose value is the same object 218 // as the result, then we keep the result contents and append to it. 219 // Otherwise we replace its contents. 220 int32_t firstArg = -1; 221 // If any non-initial argument value is the same object as the result, 222 // then we first copy its contents and use that instead while formatting. 223 UnicodeString resultCopy; 224 if (getArgumentLimit(cp, cpLength) > 0) { 225 for (int32_t i = 1; i < cpLength;) { 226 int32_t n = cp[i++]; 227 if (n < ARG_NUM_LIMIT) { 228 if (values[n] == &result) { 229 if (i == 2) { 230 firstArg = n; 231 } else if (resultCopy.isEmpty() && !result.isEmpty()) { 232 resultCopy = result; 233 } 234 } 235 } else { 236 i += n - ARG_NUM_LIMIT; 237 } 238 } 239 } 240 if (firstArg < 0) { 241 result.remove(); 242 } 243 return format(cp, cpLength, values, 244 result, &resultCopy, false, 245 offsets, offsetsLength, errorCode); 246 } 247 248 UnicodeString SimpleFormatter::getTextWithNoArguments( 249 const char16_t *compiledPattern, 250 int32_t compiledPatternLength, 251 int32_t* offsets, 252 int32_t offsetsLength) { 253 for (int32_t i = 0; i < offsetsLength; i++) { 254 offsets[i] = -1; 255 } 256 int32_t capacity = compiledPatternLength - 1 - 257 getArgumentLimit(compiledPattern, compiledPatternLength); 258 UnicodeString sb(capacity, 0, 0); // Java: StringBuilder 259 for (int32_t i = 1; i < compiledPatternLength;) { 260 int32_t n = compiledPattern[i++]; 261 if (n > ARG_NUM_LIMIT) { 262 n -= ARG_NUM_LIMIT; 263 sb.append(compiledPattern + i, n); 264 i += n; 265 } else if (n < offsetsLength) { 266 // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}". 267 // Consider removing this function and replacing it with an iterator interface. 268 offsets[n] = sb.length(); 269 } 270 } 271 return sb; 272 } 273 274 UnicodeString &SimpleFormatter::format( 275 const char16_t *compiledPattern, int32_t compiledPatternLength, 276 const UnicodeString *const *values, 277 UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, 278 int32_t *offsets, int32_t offsetsLength, 279 UErrorCode &errorCode) { 280 if (U_FAILURE(errorCode)) { 281 return result; 282 } 283 for (int32_t i = 0; i < offsetsLength; i++) { 284 offsets[i] = -1; 285 } 286 for (int32_t i = 1; i < compiledPatternLength;) { 287 int32_t n = compiledPattern[i++]; 288 if (n < ARG_NUM_LIMIT) { 289 const UnicodeString *value = values[n]; 290 if (value == nullptr) { 291 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 292 return result; 293 } 294 if (value == &result) { 295 if (forbidResultAsValue) { 296 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 297 return result; 298 } 299 if (i == 2) { 300 // We are appending to result which is also the first value object. 301 if (n < offsetsLength) { 302 offsets[n] = 0; 303 } 304 } else { 305 if (n < offsetsLength) { 306 offsets[n] = result.length(); 307 } 308 result.append(*resultCopy); 309 } 310 } else { 311 if (n < offsetsLength) { 312 offsets[n] = result.length(); 313 } 314 result.append(*value); 315 } 316 } else { 317 int32_t length = n - ARG_NUM_LIMIT; 318 result.append(compiledPattern + i, length); 319 i += length; 320 } 321 } 322 return result; 323 } 324 325 U_NAMESPACE_END