string_util.h (10328B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 // 7 // This file defines utility functions for working with strings. 8 9 #ifndef BASE_STRING_UTIL_H_ 10 #define BASE_STRING_UTIL_H_ 11 12 #include <stdarg.h> // va_list 13 #include <ctype.h> 14 15 #include <string> 16 #include <vector> 17 18 #include "base/basictypes.h" 19 #include "base/string16.h" 20 #include "base/string_piece.h" // For implicit conversions. 21 22 // Safe standard library wrappers for all platforms. 23 24 namespace base { 25 26 // C standard-library functions like "strncasecmp" and "snprintf" that aren't 27 // cross-platform are provided as "base::strncasecmp", and their prototypes 28 // are listed below. These functions are then implemented as inline calls 29 // to the platform-specific equivalents in the platform-specific headers. 30 31 // Compare the two strings s1 and s2 without regard to case using 32 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 33 // s2 > s1 according to a lexicographic comparison. 34 int strcasecmp(const char* s1, const char* s2); 35 36 // Compare up to count characters of s1 and s2 without regard to case using 37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 38 // s2 > s1 according to a lexicographic comparison. 39 int strncasecmp(const char* s1, const char* s2, size_t count); 40 41 // Wrapper for vsnprintf that always null-terminates and always returns the 42 // number of characters that would be in an untruncated formatted 43 // string, even when truncation occurs. 44 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments); 45 46 // vswprintf always null-terminates, but when truncation occurs, it will either 47 // return -1 or the number of characters that would be in an untruncated 48 // formatted string. The actual return value depends on the underlying 49 // C library's vswprintf implementation. 50 int vswprintf(wchar_t* buffer, size_t size, const wchar_t* format, 51 va_list arguments); 52 53 // Some of these implementations need to be inlined. 54 55 inline int snprintf(char* buffer, size_t size, const char* format, ...) { 56 va_list arguments; 57 va_start(arguments, format); 58 int result = vsnprintf(buffer, size, format, arguments); 59 va_end(arguments); 60 return result; 61 } 62 63 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { 64 va_list arguments; 65 va_start(arguments, format); 66 int result = vswprintf(buffer, size, format, arguments); 67 va_end(arguments); 68 return result; 69 } 70 71 // BSD-style safe and consistent string copy functions. 72 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 73 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 74 // long as |dst_size| is not 0. Returns the length of |src| in characters. 75 // If the return value is >= dst_size, then the output was truncated. 76 // NOTE: All sizes are in number of characters, NOT in bytes. 77 size_t strlcpy(char* dst, const char* src, size_t dst_size); 78 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 79 80 // Scan a wprintf format string to determine whether it's portable across a 81 // variety of systems. This function only checks that the conversion 82 // specifiers used by the format string are supported and have the same meaning 83 // on a variety of systems. It doesn't check for other errors that might occur 84 // within a format string. 85 // 86 // Nonportable conversion specifiers for wprintf are: 87 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char 88 // data on all systems except Windows, which treat them as wchar_t data. 89 // Use %ls and %lc for wchar_t data instead. 90 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, 91 // which treat them as char data. Use %ls and %lc for wchar_t data 92 // instead. 93 // - 'F', which is not identified by Windows wprintf documentation. 94 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. 95 // Use %ld, %lo, and %lu instead. 96 // 97 // Note that there is no portable conversion specifier for char data when 98 // working with wprintf. 99 // 100 // This function is intended to be called from base::vswprintf. 101 bool IsWprintfFormatPortable(const wchar_t* format); 102 103 } // namespace base 104 105 #if defined(XP_WIN) 106 # include "base/string_util_win.h" 107 #else 108 # include "base/string_util_posix.h" 109 #endif 110 111 // Trims any whitespace from either end of the input string. Returns where 112 // whitespace was found. 113 // The non-wide version has two functions: 114 // * TrimWhitespaceASCII() 115 // This function is for ASCII strings and only looks for ASCII whitespace; 116 // * TrimWhitespaceUTF8() 117 // This function is for UTF-8 strings and looks for Unicode whitespace. 118 // Please choose the best one according to your usage. 119 // NOTE: Safe to use the same variable for both input and output. 120 enum TrimPositions { 121 TRIM_NONE = 0, 122 TRIM_LEADING = 1 << 0, 123 TRIM_TRAILING = 1 << 1, 124 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING 125 }; 126 TrimPositions TrimWhitespace(const std::wstring& input, TrimPositions positions, 127 std::wstring* output); 128 TrimPositions TrimWhitespaceASCII(const std::string& input, 129 TrimPositions positions, std::string* output); 130 131 // Deprecated. This function is only for backward compatibility and calls 132 // TrimWhitespaceASCII(). 133 TrimPositions TrimWhitespace(const std::string& input, TrimPositions positions, 134 std::string* output); 135 136 // Searches for CR or LF characters. Removes all contiguous whitespace 137 // strings that contain them. This is useful when trying to deal with text 138 // copied from terminals. 139 // Returns |text, with the following three transformations: 140 // (1) Leading and trailing whitespace is trimmed. 141 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 142 // sequences containing a CR or LF are trimmed. 143 // (3) All other whitespace sequences are converted to single spaces. 144 std::wstring CollapseWhitespace(const std::wstring& text, 145 bool trim_sequences_with_line_breaks); 146 147 // These convert between ASCII (7-bit) and Wide/UTF16 strings. 148 std::string WideToASCII(const std::wstring& wide); 149 std::wstring ASCIIToWide(const std::string& ascii); 150 std::string UTF16ToASCII(const string16& utf16); 151 string16 ASCIIToUTF16(const std::string& ascii); 152 153 // These convert between UTF-8, -16, and -32 strings. They are potentially slow, 154 // so avoid unnecessary conversions. The low-level versions return a boolean 155 // indicating whether the conversion was 100% valid. In this case, it will still 156 // do the best it can and put the result in the output buffer. The versions that 157 // return strings ignore this error and just return the best conversion 158 // possible. 159 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); 160 std::string WideToUTF8(const std::wstring& wide); 161 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); 162 std::wstring UTF8ToWide(const ::StringPiece& utf8); 163 164 bool IsStringASCII(const std::wstring& str); 165 bool IsStringASCII(const std::string& str); 166 bool IsStringASCII(const string16& str); 167 168 // Specialized string-conversion functions. 169 std::string IntToString(int value); 170 std::wstring IntToWString(int value); 171 std::string UintToString(unsigned int value); 172 std::wstring UintToWString(unsigned int value); 173 std::string Int64ToString(int64_t value); 174 std::wstring Int64ToWString(int64_t value); 175 std::string Uint64ToString(uint64_t value); 176 std::wstring Uint64ToWString(uint64_t value); 177 // The DoubleToString methods convert the double to a string format that 178 // ignores the locale. If you want to use locale specific formatting, use ICU. 179 std::string DoubleToString(double value); 180 std::wstring DoubleToWString(double value); 181 182 // Perform a best-effort conversion of the input string to a numeric type, 183 // setting |*output| to the result of the conversion. Returns true for 184 // "perfect" conversions; returns false in the following cases: 185 // - Overflow/underflow. |*output| will be set to the maximum value supported 186 // by the data type. 187 // - Trailing characters in the string after parsing the number. |*output| 188 // will be set to the value of the number that was parsed. 189 // - No characters parseable as a number at the beginning of the string. 190 // |*output| will be set to 0. 191 // - Empty string. |*output| will be set to 0. 192 bool StringToInt(const std::string& input, int* output); 193 bool StringToInt(const string16& input, int* output); 194 bool StringToInt64(const std::string& input, int64_t* output); 195 bool StringToInt64(const string16& input, int64_t* output); 196 197 // Convenience forms of the above, when the caller is uninterested in the 198 // boolean return value. These return only the |*output| value from the 199 // above conversions: a best-effort conversion when possible, otherwise, 0. 200 int StringToInt(const std::string& value); 201 int StringToInt(const string16& value); 202 int64_t StringToInt64(const std::string& value); 203 int64_t StringToInt64(const string16& value); 204 205 // Return a C++ string given printf-like input. 206 std::string StringPrintf(const char* format, ...); 207 std::wstring StringPrintf(const wchar_t* format, ...); 208 209 // Store result into a supplied string and return it 210 const std::string& SStringPrintf(std::string* dst, const char* format, ...); 211 const std::wstring& SStringPrintf(std::wstring* dst, const wchar_t* format, 212 ...); 213 214 // Append result to a supplied string 215 void StringAppendF(std::string* dst, const char* format, ...); 216 void StringAppendF(std::wstring* dst, const wchar_t* format, ...); 217 218 //----------------------------------------------------------------------------- 219 220 // Splits |str| into a vector of strings delimited by |s|. Append the results 221 // into |r| as they appear. If several instances of |s| are contiguous, or if 222 // |str| begins with or ends with |s|, then an empty string is inserted. 223 // 224 // Every substring is trimmed of any leading or trailing white space. 225 void SplitString(const std::wstring& str, wchar_t s, 226 std::vector<std::wstring>* r); 227 void SplitString(const std::string& str, char s, std::vector<std::string>* r); 228 229 #endif // BASE_STRING_UTIL_H_