ascii.h (9984B)
1 // 2 // Copyright 2017 The Abseil Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // ----------------------------------------------------------------------------- 17 // File: ascii.h 18 // ----------------------------------------------------------------------------- 19 // 20 // This package contains functions operating on characters and strings 21 // restricted to standard ASCII. These include character classification 22 // functions analogous to those found in the ANSI C Standard Library <ctype.h> 23 // header file. 24 // 25 // C++ implementations provide <ctype.h> functionality based on their 26 // C environment locale. In general, reliance on such a locale is not ideal, as 27 // the locale standard is problematic (and may not return invariant information 28 // for the same character set, for example). These `ascii_*()` functions are 29 // hard-wired for standard ASCII, much faster, and guaranteed to behave 30 // consistently. They will never be overloaded, nor will their function 31 // signature change. 32 // 33 // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, 34 // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, 35 // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, 36 // `ascii_isxdigit()` 37 // Analogous to the <ctype.h> functions with similar names, these 38 // functions take an unsigned char and return a bool, based on whether the 39 // character matches the condition specified. 40 // 41 // If the input character has a numerical value greater than 127, these 42 // functions return `false`. 43 // 44 // `ascii_tolower()`, `ascii_toupper()` 45 // Analogous to the <ctype.h> functions with similar names, these functions 46 // take an unsigned char and return a char. 47 // 48 // If the input character is not an ASCII {lower,upper}-case letter (including 49 // numerical values greater than 127) then the functions return the same value 50 // as the input character. 51 52 #ifndef ABSL_STRINGS_ASCII_H_ 53 #define ABSL_STRINGS_ASCII_H_ 54 55 #include <algorithm> 56 #include <cstddef> 57 #include <string> 58 #include <utility> 59 60 #include "absl/base/attributes.h" 61 #include "absl/base/config.h" 62 #include "absl/base/nullability.h" 63 #include "absl/strings/internal/resize_uninitialized.h" 64 #include "absl/strings/string_view.h" 65 66 namespace absl { 67 ABSL_NAMESPACE_BEGIN 68 namespace ascii_internal { 69 70 // Declaration for an array of bitfields holding character information. 71 ABSL_DLL extern const unsigned char kPropertyBits[256]; 72 73 // Declaration for the array of characters to upper-case characters. 74 ABSL_DLL extern const char kToUpper[256]; 75 76 // Declaration for the array of characters to lower-case characters. 77 ABSL_DLL extern const char kToLower[256]; 78 79 void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src, 80 size_t n); 81 82 void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src, 83 size_t n); 84 85 } // namespace ascii_internal 86 87 // ascii_isalpha() 88 // 89 // Determines whether the given character is an alphabetic character. 90 inline bool ascii_isalpha(unsigned char c) { 91 return (ascii_internal::kPropertyBits[c] & 0x01) != 0; 92 } 93 94 // ascii_isalnum() 95 // 96 // Determines whether the given character is an alphanumeric character. 97 inline bool ascii_isalnum(unsigned char c) { 98 return (ascii_internal::kPropertyBits[c] & 0x04) != 0; 99 } 100 101 // ascii_isspace() 102 // 103 // Determines whether the given character is a whitespace character (space, 104 // tab, vertical tab, formfeed, linefeed, or carriage return). 105 inline bool ascii_isspace(unsigned char c) { 106 return (ascii_internal::kPropertyBits[c] & 0x08) != 0; 107 } 108 109 // ascii_ispunct() 110 // 111 // Determines whether the given character is a punctuation character. 112 inline bool ascii_ispunct(unsigned char c) { 113 return (ascii_internal::kPropertyBits[c] & 0x10) != 0; 114 } 115 116 // ascii_isblank() 117 // 118 // Determines whether the given character is a blank character (tab or space). 119 inline bool ascii_isblank(unsigned char c) { 120 return (ascii_internal::kPropertyBits[c] & 0x20) != 0; 121 } 122 123 // ascii_iscntrl() 124 // 125 // Determines whether the given character is a control character. 126 inline bool ascii_iscntrl(unsigned char c) { 127 return (ascii_internal::kPropertyBits[c] & 0x40) != 0; 128 } 129 130 // ascii_isxdigit() 131 // 132 // Determines whether the given character can be represented as a hexadecimal 133 // digit character (i.e. {0-9} or {A-F}). 134 inline bool ascii_isxdigit(unsigned char c) { 135 return (ascii_internal::kPropertyBits[c] & 0x80) != 0; 136 } 137 138 // ascii_isdigit() 139 // 140 // Determines whether the given character can be represented as a decimal 141 // digit character (i.e. {0-9}). 142 inline constexpr bool ascii_isdigit(unsigned char c) { 143 return c >= '0' && c <= '9'; 144 } 145 146 // ascii_isprint() 147 // 148 // Determines whether the given character is printable, including spaces. 149 inline constexpr bool ascii_isprint(unsigned char c) { 150 return c >= 32 && c < 127; 151 } 152 153 // ascii_isgraph() 154 // 155 // Determines whether the given character has a graphical representation. 156 inline constexpr bool ascii_isgraph(unsigned char c) { 157 return c > 32 && c < 127; 158 } 159 160 // ascii_isupper() 161 // 162 // Determines whether the given character is uppercase. 163 inline constexpr bool ascii_isupper(unsigned char c) { 164 return c >= 'A' && c <= 'Z'; 165 } 166 167 // ascii_islower() 168 // 169 // Determines whether the given character is lowercase. 170 inline constexpr bool ascii_islower(unsigned char c) { 171 return c >= 'a' && c <= 'z'; 172 } 173 174 // ascii_isascii() 175 // 176 // Determines whether the given character is ASCII. 177 inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; } 178 179 // ascii_tolower() 180 // 181 // Returns an ASCII character, converting to lowercase if uppercase is 182 // passed. Note that character values > 127 are simply returned. 183 inline char ascii_tolower(unsigned char c) { 184 return ascii_internal::kToLower[c]; 185 } 186 187 // Converts the characters in `s` to lowercase, changing the contents of `s`. 188 void AsciiStrToLower(absl::Nonnull<std::string*> s); 189 190 // Creates a lowercase string from a given absl::string_view. 191 [[nodiscard]] inline std::string AsciiStrToLower(absl::string_view s) { 192 std::string result; 193 strings_internal::STLStringResizeUninitialized(&result, s.size()); 194 ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size()); 195 return result; 196 } 197 198 // Creates a lowercase string from a given std::string&&. 199 // 200 // (Template is used to lower priority of this overload.) 201 template <int&... DoNotSpecify> 202 [[nodiscard]] inline std::string AsciiStrToLower(std::string&& s) { 203 std::string result = std::move(s); 204 absl::AsciiStrToLower(&result); 205 return result; 206 } 207 208 // ascii_toupper() 209 // 210 // Returns the ASCII character, converting to upper-case if lower-case is 211 // passed. Note that characters values > 127 are simply returned. 212 inline char ascii_toupper(unsigned char c) { 213 return ascii_internal::kToUpper[c]; 214 } 215 216 // Converts the characters in `s` to uppercase, changing the contents of `s`. 217 void AsciiStrToUpper(absl::Nonnull<std::string*> s); 218 219 // Creates an uppercase string from a given absl::string_view. 220 [[nodiscard]] inline std::string AsciiStrToUpper(absl::string_view s) { 221 std::string result; 222 strings_internal::STLStringResizeUninitialized(&result, s.size()); 223 ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size()); 224 return result; 225 } 226 227 // Creates an uppercase string from a given std::string&&. 228 // 229 // (Template is used to lower priority of this overload.) 230 template <int&... DoNotSpecify> 231 [[nodiscard]] inline std::string AsciiStrToUpper(std::string&& s) { 232 std::string result = std::move(s); 233 absl::AsciiStrToUpper(&result); 234 return result; 235 } 236 237 // Returns absl::string_view with whitespace stripped from the beginning of the 238 // given string_view. 239 [[nodiscard]] inline absl::string_view StripLeadingAsciiWhitespace( 240 absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { 241 auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); 242 return str.substr(static_cast<size_t>(it - str.begin())); 243 } 244 245 // Strips in place whitespace from the beginning of the given string. 246 inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) { 247 auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); 248 str->erase(str->begin(), it); 249 } 250 251 // Returns absl::string_view with whitespace stripped from the end of the given 252 // string_view. 253 [[nodiscard]] inline absl::string_view StripTrailingAsciiWhitespace( 254 absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { 255 auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); 256 return str.substr(0, static_cast<size_t>(str.rend() - it)); 257 } 258 259 // Strips in place whitespace from the end of the given string 260 inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) { 261 auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); 262 str->erase(static_cast<size_t>(str->rend() - it)); 263 } 264 265 // Returns absl::string_view with whitespace stripped from both ends of the 266 // given string_view. 267 [[nodiscard]] inline absl::string_view StripAsciiWhitespace( 268 absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { 269 return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); 270 } 271 272 // Strips in place whitespace from both ends of the given string 273 inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) { 274 StripTrailingAsciiWhitespace(str); 275 StripLeadingAsciiWhitespace(str); 276 } 277 278 // Removes leading, trailing, and consecutive internal whitespace. 279 void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str); 280 281 ABSL_NAMESPACE_END 282 } // namespace absl 283 284 #endif // ABSL_STRINGS_ASCII_H_