string_util.cc (14472B)
1 // Copyright 2013 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <errno.h> 8 #include <math.h> 9 #include <stdarg.h> 10 #include <stdint.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <time.h> 15 #include <wchar.h> 16 17 #include <limits> 18 #include <type_traits> 19 #include <vector> 20 21 #include "base/check_op.h" 22 #include "base/no_destructor.h" 23 #include "base/ranges/algorithm.h" 24 #include "base/strings/string_util_impl_helpers.h" 25 #include "base/strings/string_util_internal.h" 26 #include "base/strings/utf_string_conversion_utils.h" 27 #include "base/strings/utf_string_conversions.h" 28 #include "base/third_party/icu/icu_utf.h" 29 #include "build/build_config.h" 30 #include "third_party/abseil-cpp/absl/types/optional.h" 31 32 namespace base { 33 34 bool IsWprintfFormatPortable(const wchar_t* format) { 35 for (const wchar_t* position = format; *position != '\0'; ++position) { 36 if (*position == '%') { 37 bool in_specification = true; 38 bool modifier_l = false; 39 while (in_specification) { 40 // Eat up characters until reaching a known specifier. 41 if (*++position == '\0') { 42 // The format string ended in the middle of a specification. Call 43 // it portable because no unportable specifications were found. The 44 // string is equally broken on all platforms. 45 return true; 46 } 47 48 if (*position == 'l') { 49 // 'l' is the only thing that can save the 's' and 'c' specifiers. 50 modifier_l = true; 51 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 52 *position == 'S' || *position == 'C' || *position == 'F' || 53 *position == 'D' || *position == 'O' || *position == 'U') { 54 // Not portable. 55 return false; 56 } 57 58 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 59 // Portable, keep scanning the rest of the format string. 60 in_specification = false; 61 } 62 } 63 } 64 } 65 66 return true; 67 } 68 69 std::string ToLowerASCII(StringPiece str) { 70 return internal::ToLowerASCIIImpl(str); 71 } 72 73 std::u16string ToLowerASCII(StringPiece16 str) { 74 return internal::ToLowerASCIIImpl(str); 75 } 76 77 std::string ToUpperASCII(StringPiece str) { 78 return internal::ToUpperASCIIImpl(str); 79 } 80 81 std::u16string ToUpperASCII(StringPiece16 str) { 82 return internal::ToUpperASCIIImpl(str); 83 } 84 85 const std::string& EmptyString() { 86 static const base::NoDestructor<std::string> s; 87 return *s; 88 } 89 90 const std::u16string& EmptyString16() { 91 static const base::NoDestructor<std::u16string> s16; 92 return *s16; 93 } 94 95 bool ReplaceChars(StringPiece16 input, 96 StringPiece16 replace_chars, 97 StringPiece16 replace_with, 98 std::u16string* output) { 99 return internal::ReplaceCharsT(input, replace_chars, replace_with, output); 100 } 101 102 bool ReplaceChars(StringPiece input, 103 StringPiece replace_chars, 104 StringPiece replace_with, 105 std::string* output) { 106 return internal::ReplaceCharsT(input, replace_chars, replace_with, output); 107 } 108 109 bool RemoveChars(StringPiece16 input, 110 StringPiece16 remove_chars, 111 std::u16string* output) { 112 return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output); 113 } 114 115 bool RemoveChars(StringPiece input, 116 StringPiece remove_chars, 117 std::string* output) { 118 return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output); 119 } 120 121 bool TrimString(StringPiece16 input, 122 StringPiece16 trim_chars, 123 std::u16string* output) { 124 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != 125 TRIM_NONE; 126 } 127 128 bool TrimString(StringPiece input, 129 StringPiece trim_chars, 130 std::string* output) { 131 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != 132 TRIM_NONE; 133 } 134 135 StringPiece16 TrimString(StringPiece16 input, 136 StringPiece16 trim_chars, 137 TrimPositions positions) { 138 return internal::TrimStringPieceT(input, trim_chars, positions); 139 } 140 141 StringPiece TrimString(StringPiece input, 142 StringPiece trim_chars, 143 TrimPositions positions) { 144 return internal::TrimStringPieceT(input, trim_chars, positions); 145 } 146 147 void TruncateUTF8ToByteSize(const std::string& input, 148 const size_t byte_size, 149 std::string* output) { 150 DCHECK(output); 151 if (byte_size > input.length()) { 152 *output = input; 153 return; 154 } 155 DCHECK_LE(byte_size, 156 static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); 157 // Note: This cast is necessary because CBU8_NEXT uses int32_ts. 158 int32_t truncation_length = static_cast<int32_t>(byte_size); 159 int32_t char_index = truncation_length - 1; 160 const char* data = input.data(); 161 162 // Using CBU8, we will move backwards from the truncation point 163 // to the beginning of the string looking for a valid UTF8 164 // character. Once a full UTF8 character is found, we will 165 // truncate the string to the end of that character. 166 while (char_index >= 0) { 167 int32_t prev = char_index; 168 base_icu::UChar32 code_point = 0; 169 CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index, 170 truncation_length, code_point); 171 if (!IsValidCharacter(code_point)) { 172 char_index = prev - 1; 173 } else { 174 break; 175 } 176 } 177 178 if (char_index >= 0 ) 179 *output = input.substr(0, static_cast<size_t>(char_index)); 180 else 181 output->clear(); 182 } 183 184 TrimPositions TrimWhitespace(StringPiece16 input, 185 TrimPositions positions, 186 std::u16string* output) { 187 return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16), 188 positions, output); 189 } 190 191 StringPiece16 TrimWhitespace(StringPiece16 input, 192 TrimPositions positions) { 193 return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), 194 positions); 195 } 196 197 TrimPositions TrimWhitespaceASCII(StringPiece input, 198 TrimPositions positions, 199 std::string* output) { 200 return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions, 201 output); 202 } 203 204 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { 205 return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII), 206 positions); 207 } 208 209 std::u16string CollapseWhitespace(StringPiece16 text, 210 bool trim_sequences_with_line_breaks) { 211 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 212 } 213 214 std::string CollapseWhitespaceASCII(StringPiece text, 215 bool trim_sequences_with_line_breaks) { 216 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 217 } 218 219 bool ContainsOnlyChars(StringPiece input, StringPiece characters) { 220 return input.find_first_not_of(characters) == StringPiece::npos; 221 } 222 223 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) { 224 return input.find_first_not_of(characters) == StringPiece16::npos; 225 } 226 227 228 bool IsStringASCII(StringPiece str) { 229 return internal::DoIsStringASCII(str.data(), str.length()); 230 } 231 232 bool IsStringASCII(StringPiece16 str) { 233 return internal::DoIsStringASCII(str.data(), str.length()); 234 } 235 236 #if defined(WCHAR_T_IS_UTF32) 237 bool IsStringASCII(WStringPiece str) { 238 return internal::DoIsStringASCII(str.data(), str.length()); 239 } 240 #endif 241 242 bool IsStringUTF8(StringPiece str) { 243 return internal::DoIsStringUTF8<IsValidCharacter>(str); 244 } 245 246 bool IsStringUTF8AllowingNoncharacters(StringPiece str) { 247 return internal::DoIsStringUTF8<IsValidCodepoint>(str); 248 } 249 250 bool EqualsASCII(StringPiece16 str, StringPiece ascii) { 251 return ranges::equal(ascii, str); 252 } 253 254 bool StartsWith(StringPiece str, 255 StringPiece search_for, 256 CompareCase case_sensitivity) { 257 return internal::StartsWithT(str, search_for, case_sensitivity); 258 } 259 260 bool StartsWith(StringPiece16 str, 261 StringPiece16 search_for, 262 CompareCase case_sensitivity) { 263 return internal::StartsWithT(str, search_for, case_sensitivity); 264 } 265 266 bool EndsWith(StringPiece str, 267 StringPiece search_for, 268 CompareCase case_sensitivity) { 269 return internal::EndsWithT(str, search_for, case_sensitivity); 270 } 271 272 bool EndsWith(StringPiece16 str, 273 StringPiece16 search_for, 274 CompareCase case_sensitivity) { 275 return internal::EndsWithT(str, search_for, case_sensitivity); 276 } 277 278 char HexDigitToInt(char c) { 279 DCHECK(IsHexDigit(c)); 280 if (c >= '0' && c <= '9') 281 return static_cast<char>(c - '0'); 282 return (c >= 'A' && c <= 'F') ? static_cast<char>(c - 'A' + 10) 283 : static_cast<char>(c - 'a' + 10); 284 } 285 286 static const char* const kByteStringsUnlocalized[] = { 287 " B", 288 " kB", 289 " MB", 290 " GB", 291 " TB", 292 " PB" 293 }; 294 295 std::u16string FormatBytesUnlocalized(int64_t bytes) { 296 double unit_amount = static_cast<double>(bytes); 297 size_t dimension = 0; 298 const int kKilo = 1024; 299 while (unit_amount >= kKilo && 300 dimension < std::size(kByteStringsUnlocalized) - 1) { 301 unit_amount /= kKilo; 302 dimension++; 303 } 304 305 char buf[64]; 306 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 307 base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount, 308 kByteStringsUnlocalized[dimension]); 309 } else { 310 base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount, 311 kByteStringsUnlocalized[dimension]); 312 } 313 314 return ASCIIToUTF16(buf); 315 } 316 317 void ReplaceFirstSubstringAfterOffset(std::u16string* str, 318 size_t start_offset, 319 StringPiece16 find_this, 320 StringPiece16 replace_with) { 321 internal::DoReplaceMatchesAfterOffset( 322 str, start_offset, internal::MakeSubstringMatcher(find_this), 323 replace_with, internal::ReplaceType::REPLACE_FIRST); 324 } 325 326 void ReplaceFirstSubstringAfterOffset(std::string* str, 327 size_t start_offset, 328 StringPiece find_this, 329 StringPiece replace_with) { 330 internal::DoReplaceMatchesAfterOffset( 331 str, start_offset, internal::MakeSubstringMatcher(find_this), 332 replace_with, internal::ReplaceType::REPLACE_FIRST); 333 } 334 335 void ReplaceSubstringsAfterOffset(std::u16string* str, 336 size_t start_offset, 337 StringPiece16 find_this, 338 StringPiece16 replace_with) { 339 internal::DoReplaceMatchesAfterOffset( 340 str, start_offset, internal::MakeSubstringMatcher(find_this), 341 replace_with, internal::ReplaceType::REPLACE_ALL); 342 } 343 344 void ReplaceSubstringsAfterOffset(std::string* str, 345 size_t start_offset, 346 StringPiece find_this, 347 StringPiece replace_with) { 348 internal::DoReplaceMatchesAfterOffset( 349 str, start_offset, internal::MakeSubstringMatcher(find_this), 350 replace_with, internal::ReplaceType::REPLACE_ALL); 351 } 352 353 char* WriteInto(std::string* str, size_t length_with_null) { 354 return internal::WriteIntoT(str, length_with_null); 355 } 356 357 char16_t* WriteInto(std::u16string* str, size_t length_with_null) { 358 return internal::WriteIntoT(str, length_with_null); 359 } 360 361 std::string JoinString(span<const std::string> parts, StringPiece separator) { 362 return internal::JoinStringT(parts, separator); 363 } 364 365 std::u16string JoinString(span<const std::u16string> parts, 366 StringPiece16 separator) { 367 return internal::JoinStringT(parts, separator); 368 } 369 370 std::string JoinString(span<const StringPiece> parts, StringPiece separator) { 371 return internal::JoinStringT(parts, separator); 372 } 373 374 std::u16string JoinString(span<const StringPiece16> parts, 375 StringPiece16 separator) { 376 return internal::JoinStringT(parts, separator); 377 } 378 379 std::string JoinString(std::initializer_list<StringPiece> parts, 380 StringPiece separator) { 381 return internal::JoinStringT(parts, separator); 382 } 383 384 std::u16string JoinString(std::initializer_list<StringPiece16> parts, 385 StringPiece16 separator) { 386 return internal::JoinStringT(parts, separator); 387 } 388 389 std::u16string ReplaceStringPlaceholders( 390 StringPiece16 format_string, 391 const std::vector<std::u16string>& subst, 392 std::vector<size_t>* offsets) { 393 absl::optional<std::u16string> replacement = 394 internal::DoReplaceStringPlaceholders( 395 format_string, subst, 396 /*placeholder_prefix*/ u'$', 397 /*should_escape_multiple_placeholder_prefixes*/ true, 398 /*is_strict_mode*/ false, offsets); 399 400 DCHECK(replacement); 401 return replacement.value(); 402 } 403 404 std::string ReplaceStringPlaceholders(StringPiece format_string, 405 const std::vector<std::string>& subst, 406 std::vector<size_t>* offsets) { 407 absl::optional<std::string> replacement = 408 internal::DoReplaceStringPlaceholders( 409 format_string, subst, 410 /*placeholder_prefix*/ '$', 411 /*should_escape_multiple_placeholder_prefixes*/ true, 412 /*is_strict_mode*/ false, offsets); 413 414 DCHECK(replacement); 415 return replacement.value(); 416 } 417 418 std::u16string ReplaceStringPlaceholders(const std::u16string& format_string, 419 const std::u16string& a, 420 size_t* offset) { 421 std::vector<size_t> offsets; 422 std::u16string result = 423 ReplaceStringPlaceholders(format_string, {a}, &offsets); 424 425 DCHECK_EQ(1U, offsets.size()); 426 if (offset) 427 *offset = offsets[0]; 428 return result; 429 } 430 431 size_t strlcpy(char* dst, const char* src, size_t dst_size) { 432 return internal::lcpyT(dst, src, dst_size); 433 } 434 435 size_t u16cstrlcpy(char16_t* dst, const char16_t* src, size_t dst_size) { 436 return internal::lcpyT(dst, src, dst_size); 437 } 438 439 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 440 return internal::lcpyT(dst, src, dst_size); 441 } 442 443 } // namespace base