tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

string_util.h (10328B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      4 // Use of this source code is governed by a BSD-style license that can be
      5 // found in the LICENSE file.
      6 //
      7 // This file defines utility functions for working with strings.
      8 
      9 #ifndef BASE_STRING_UTIL_H_
     10 #define BASE_STRING_UTIL_H_
     11 
     12 #include <stdarg.h>  // va_list
     13 #include <ctype.h>
     14 
     15 #include <string>
     16 #include <vector>
     17 
     18 #include "base/basictypes.h"
     19 #include "base/string16.h"
     20 #include "base/string_piece.h"  // For implicit conversions.
     21 
     22 // Safe standard library wrappers for all platforms.
     23 
     24 namespace base {
     25 
     26 // C standard-library functions like "strncasecmp" and "snprintf" that aren't
     27 // cross-platform are provided as "base::strncasecmp", and their prototypes
     28 // are listed below.  These functions are then implemented as inline calls
     29 // to the platform-specific equivalents in the platform-specific headers.
     30 
     31 // Compare the two strings s1 and s2 without regard to case using
     32 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
     33 // s2 > s1 according to a lexicographic comparison.
     34 int strcasecmp(const char* s1, const char* s2);
     35 
     36 // Compare up to count characters of s1 and s2 without regard to case using
     37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
     38 // s2 > s1 according to a lexicographic comparison.
     39 int strncasecmp(const char* s1, const char* s2, size_t count);
     40 
     41 // Wrapper for vsnprintf that always null-terminates and always returns the
     42 // number of characters that would be in an untruncated formatted
     43 // string, even when truncation occurs.
     44 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments);
     45 
     46 // vswprintf always null-terminates, but when truncation occurs, it will either
     47 // return -1 or the number of characters that would be in an untruncated
     48 // formatted string.  The actual return value depends on the underlying
     49 // C library's vswprintf implementation.
     50 int vswprintf(wchar_t* buffer, size_t size, const wchar_t* format,
     51              va_list arguments);
     52 
     53 // Some of these implementations need to be inlined.
     54 
     55 inline int snprintf(char* buffer, size_t size, const char* format, ...) {
     56  va_list arguments;
     57  va_start(arguments, format);
     58  int result = vsnprintf(buffer, size, format, arguments);
     59  va_end(arguments);
     60  return result;
     61 }
     62 
     63 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
     64  va_list arguments;
     65  va_start(arguments, format);
     66  int result = vswprintf(buffer, size, format, arguments);
     67  va_end(arguments);
     68  return result;
     69 }
     70 
     71 // BSD-style safe and consistent string copy functions.
     72 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
     73 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
     74 // long as |dst_size| is not 0.  Returns the length of |src| in characters.
     75 // If the return value is >= dst_size, then the output was truncated.
     76 // NOTE: All sizes are in number of characters, NOT in bytes.
     77 size_t strlcpy(char* dst, const char* src, size_t dst_size);
     78 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
     79 
     80 // Scan a wprintf format string to determine whether it's portable across a
     81 // variety of systems.  This function only checks that the conversion
     82 // specifiers used by the format string are supported and have the same meaning
     83 // on a variety of systems.  It doesn't check for other errors that might occur
     84 // within a format string.
     85 //
     86 // Nonportable conversion specifiers for wprintf are:
     87 //  - 's' and 'c' without an 'l' length modifier.  %s and %c operate on char
     88 //     data on all systems except Windows, which treat them as wchar_t data.
     89 //     Use %ls and %lc for wchar_t data instead.
     90 //  - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
     91 //     which treat them as char data.  Use %ls and %lc for wchar_t data
     92 //     instead.
     93 //  - 'F', which is not identified by Windows wprintf documentation.
     94 //  - 'D', 'O', and 'U', which are deprecated and not available on all systems.
     95 //     Use %ld, %lo, and %lu instead.
     96 //
     97 // Note that there is no portable conversion specifier for char data when
     98 // working with wprintf.
     99 //
    100 // This function is intended to be called from base::vswprintf.
    101 bool IsWprintfFormatPortable(const wchar_t* format);
    102 
    103 }  // namespace base
    104 
    105 #if defined(XP_WIN)
    106 #  include "base/string_util_win.h"
    107 #else
    108 #  include "base/string_util_posix.h"
    109 #endif
    110 
    111 // Trims any whitespace from either end of the input string.  Returns where
    112 // whitespace was found.
    113 // The non-wide version has two functions:
    114 // * TrimWhitespaceASCII()
    115 //   This function is for ASCII strings and only looks for ASCII whitespace;
    116 // * TrimWhitespaceUTF8()
    117 //   This function is for UTF-8 strings and looks for Unicode whitespace.
    118 // Please choose the best one according to your usage.
    119 // NOTE: Safe to use the same variable for both input and output.
    120 enum TrimPositions {
    121  TRIM_NONE = 0,
    122  TRIM_LEADING = 1 << 0,
    123  TRIM_TRAILING = 1 << 1,
    124  TRIM_ALL = TRIM_LEADING | TRIM_TRAILING
    125 };
    126 TrimPositions TrimWhitespace(const std::wstring& input, TrimPositions positions,
    127                             std::wstring* output);
    128 TrimPositions TrimWhitespaceASCII(const std::string& input,
    129                                  TrimPositions positions, std::string* output);
    130 
    131 // Deprecated. This function is only for backward compatibility and calls
    132 // TrimWhitespaceASCII().
    133 TrimPositions TrimWhitespace(const std::string& input, TrimPositions positions,
    134                             std::string* output);
    135 
    136 // Searches  for CR or LF characters.  Removes all contiguous whitespace
    137 // strings that contain them.  This is useful when trying to deal with text
    138 // copied from terminals.
    139 // Returns |text, with the following three transformations:
    140 // (1) Leading and trailing whitespace is trimmed.
    141 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
    142 //     sequences containing a CR or LF are trimmed.
    143 // (3) All other whitespace sequences are converted to single spaces.
    144 std::wstring CollapseWhitespace(const std::wstring& text,
    145                                bool trim_sequences_with_line_breaks);
    146 
    147 // These convert between ASCII (7-bit) and Wide/UTF16 strings.
    148 std::string WideToASCII(const std::wstring& wide);
    149 std::wstring ASCIIToWide(const std::string& ascii);
    150 std::string UTF16ToASCII(const string16& utf16);
    151 string16 ASCIIToUTF16(const std::string& ascii);
    152 
    153 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,
    154 // so avoid unnecessary conversions. The low-level versions return a boolean
    155 // indicating whether the conversion was 100% valid. In this case, it will still
    156 // do the best it can and put the result in the output buffer. The versions that
    157 // return strings ignore this error and just return the best conversion
    158 // possible.
    159 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);
    160 std::string WideToUTF8(const std::wstring& wide);
    161 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);
    162 std::wstring UTF8ToWide(const ::StringPiece& utf8);
    163 
    164 bool IsStringASCII(const std::wstring& str);
    165 bool IsStringASCII(const std::string& str);
    166 bool IsStringASCII(const string16& str);
    167 
    168 // Specialized string-conversion functions.
    169 std::string IntToString(int value);
    170 std::wstring IntToWString(int value);
    171 std::string UintToString(unsigned int value);
    172 std::wstring UintToWString(unsigned int value);
    173 std::string Int64ToString(int64_t value);
    174 std::wstring Int64ToWString(int64_t value);
    175 std::string Uint64ToString(uint64_t value);
    176 std::wstring Uint64ToWString(uint64_t value);
    177 // The DoubleToString methods convert the double to a string format that
    178 // ignores the locale.  If you want to use locale specific formatting, use ICU.
    179 std::string DoubleToString(double value);
    180 std::wstring DoubleToWString(double value);
    181 
    182 // Perform a best-effort conversion of the input string to a numeric type,
    183 // setting |*output| to the result of the conversion.  Returns true for
    184 // "perfect" conversions; returns false in the following cases:
    185 //  - Overflow/underflow.  |*output| will be set to the maximum value supported
    186 //    by the data type.
    187 //  - Trailing characters in the string after parsing the number.  |*output|
    188 //    will be set to the value of the number that was parsed.
    189 //  - No characters parseable as a number at the beginning of the string.
    190 //    |*output| will be set to 0.
    191 //  - Empty string.  |*output| will be set to 0.
    192 bool StringToInt(const std::string& input, int* output);
    193 bool StringToInt(const string16& input, int* output);
    194 bool StringToInt64(const std::string& input, int64_t* output);
    195 bool StringToInt64(const string16& input, int64_t* output);
    196 
    197 // Convenience forms of the above, when the caller is uninterested in the
    198 // boolean return value.  These return only the |*output| value from the
    199 // above conversions: a best-effort conversion when possible, otherwise, 0.
    200 int StringToInt(const std::string& value);
    201 int StringToInt(const string16& value);
    202 int64_t StringToInt64(const std::string& value);
    203 int64_t StringToInt64(const string16& value);
    204 
    205 // Return a C++ string given printf-like input.
    206 std::string StringPrintf(const char* format, ...);
    207 std::wstring StringPrintf(const wchar_t* format, ...);
    208 
    209 // Store result into a supplied string and return it
    210 const std::string& SStringPrintf(std::string* dst, const char* format, ...);
    211 const std::wstring& SStringPrintf(std::wstring* dst, const wchar_t* format,
    212                                  ...);
    213 
    214 // Append result to a supplied string
    215 void StringAppendF(std::string* dst, const char* format, ...);
    216 void StringAppendF(std::wstring* dst, const wchar_t* format, ...);
    217 
    218 //-----------------------------------------------------------------------------
    219 
    220 // Splits |str| into a vector of strings delimited by |s|. Append the results
    221 // into |r| as they appear. If several instances of |s| are contiguous, or if
    222 // |str| begins with or ends with |s|, then an empty string is inserted.
    223 //
    224 // Every substring is trimmed of any leading or trailing white space.
    225 void SplitString(const std::wstring& str, wchar_t s,
    226                 std::vector<std::wstring>* r);
    227 void SplitString(const std::string& str, char s, std::vector<std::string>* r);
    228 
    229 #endif  // BASE_STRING_UTIL_H_