strtools_public.cpp (17525B)
1 //========= Copyright Valve Corporation ============// 2 #include "strtools_public.h" 3 #include <string.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <sstream> 7 // Mozilla: see mozilla.patch for more details 8 // #include <codecvt> 9 // #include <iostream> 10 #include <functional> 11 #include <locale> 12 // #include <codecvt> 13 14 #if defined( _WIN32 ) 15 #include <windows.h> 16 #endif 17 18 //----------------------------------------------------------------------------- 19 // Purpose: 20 //----------------------------------------------------------------------------- 21 bool StringHasPrefix( const std::string & sString, const std::string & sPrefix ) 22 { 23 return 0 == strnicmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() ); 24 } 25 26 bool StringHasPrefixCaseSensitive( const std::string & sString, const std::string & sPrefix ) 27 { 28 return 0 == strncmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() ); 29 } 30 31 32 bool StringHasSuffix( const std::string &sString, const std::string &sSuffix ) 33 { 34 size_t cStrLen = sString.length(); 35 size_t cSuffixLen = sSuffix.length(); 36 37 if ( cSuffixLen > cStrLen ) 38 return false; 39 40 std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen ); 41 42 return 0 == stricmp( sStringSuffix.c_str(), sSuffix.c_str() ); 43 } 44 45 bool StringHasSuffixCaseSensitive( const std::string &sString, const std::string &sSuffix ) 46 { 47 size_t cStrLen = sString.length(); 48 size_t cSuffixLen = sSuffix.length(); 49 50 if ( cSuffixLen > cStrLen ) 51 return false; 52 53 std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen ); 54 55 return 0 == strncmp( sStringSuffix.c_str(), sSuffix.c_str(),cSuffixLen ); 56 } 57 58 //----------------------------------------------------------------------------- 59 // Purpose: 60 //----------------------------------------------------------------------------- 61 // Mozilla: see mozilla.patch for more details 62 //typedef std::codecvt_utf8< wchar_t > convert_type; 63 64 // Mozilla: see mozilla.patch for more details 65 #if defined( _WIN32 ) 66 std::string UTF16to8(const wchar_t * in) 67 { 68 int retLength = ::WideCharToMultiByte(CP_UTF8, 0, in, -1, nullptr, 0, nullptr, nullptr); 69 if (retLength == 0) 70 { 71 return std::string(); 72 } 73 74 char* retString = new char[retLength]; 75 ::WideCharToMultiByte(CP_UTF8, 0, in, -1, retString, retLength, nullptr, nullptr); 76 77 std::string retStringValue(retString); 78 79 delete[] retString; 80 81 return retStringValue; 82 83 // static std::wstring_convert< convert_type, wchar_t > s_converter; // construction of this can be expensive (or even serialized) depending on locale 84 85 // try 86 // { 87 // return s_converter.to_bytes( in ); 88 // } 89 // catch ( ... ) 90 // { 91 // return std::string(); 92 // } 93 } 94 95 std::string UTF16to8( const std::wstring & in ) { return UTF16to8( in.c_str() ); } 96 97 // Mozilla: see mozilla.patch for more details 98 std::wstring UTF8to16(const char * in) 99 { 100 int retLength = ::MultiByteToWideChar(CP_UTF8, 0, in, -1, nullptr, 0); 101 if (retLength == 0) 102 { 103 return std::wstring(); 104 } 105 106 wchar_t* retString = new wchar_t[retLength]; 107 ::MultiByteToWideChar(CP_UTF8, 0, in, -1, retString, retLength); 108 109 std::wstring retStringValue(retString); 110 111 delete[] retString; 112 113 return retStringValue; 114 115 //static std::wstring_convert< convert_type, wchar_t > s_converter; // construction of this can be expensive (or even serialized) depending on locale 116 117 //try 118 //{ 119 // return s_converter.from_bytes( in ); 120 //} 121 //catch ( ... ) 122 //{ 123 // return std::wstring(); 124 //} 125 } 126 127 std::wstring UTF8to16( const std::string & in ) { return UTF8to16( in.c_str() ); } 128 #endif 129 130 131 #if defined( _WIN32 ) 132 //----------------------------------------------------------------------------- 133 // Purpose: Convert LPSTR in the default CodePage to UTF8 134 //----------------------------------------------------------------------------- 135 std::string DefaultACPtoUTF8( const char *pszStr ) 136 { 137 if ( GetACP() == CP_UTF8 ) 138 { 139 return pszStr; 140 } 141 else 142 { 143 std::vector<wchar_t> vecBuf( strlen( pszStr ) + 1 ); // should be guaranteed to be enough 144 MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, pszStr, -1, vecBuf.data(), (int) vecBuf.size() ); 145 return UTF16to8( vecBuf.data() ); 146 } 147 } 148 #endif 149 150 // -------------------------------------------------------------------- 151 // Purpose: 152 // -------------------------------------------------------------------- 153 void strcpy_safe( char *pchBuffer, size_t unBufferSizeBytes, const char *pchSource ) 154 { 155 strncpy( pchBuffer, pchSource, unBufferSizeBytes - 1 ); 156 pchBuffer[unBufferSizeBytes - 1] = '\0'; 157 } 158 159 // -------------------------------------------------------------------- 160 // Purpose: converts a string to upper case 161 // -------------------------------------------------------------------- 162 std::string StringToUpper( const std::string & sString ) 163 { 164 std::string sOut; 165 sOut.reserve( sString.size() + 1 ); 166 for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ ) 167 { 168 sOut.push_back( (char)toupper( *i ) ); 169 } 170 171 return sOut; 172 } 173 174 175 // -------------------------------------------------------------------- 176 // Purpose: converts a string to lower case 177 // -------------------------------------------------------------------- 178 std::string StringToLower( const std::string & sString ) 179 { 180 std::string sOut; 181 sOut.reserve( sString.size() + 1 ); 182 for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ ) 183 { 184 sOut.push_back( (char)tolower( *i ) ); 185 } 186 187 return sOut; 188 } 189 190 191 uint32_t ReturnStdString( const std::string & sValue, char *pchBuffer, uint32_t unBufferLen ) 192 { 193 uint32_t unLen = (uint32_t)sValue.length() + 1; 194 if( !pchBuffer || !unBufferLen ) 195 return unLen; 196 197 if( unBufferLen < unLen ) 198 { 199 pchBuffer[0] = '\0'; 200 } 201 else 202 { 203 memcpy( pchBuffer, sValue.c_str(), unLen ); 204 } 205 206 return unLen; 207 } 208 209 210 /** Returns a std::string from a uint64_t */ 211 // Mozilla: see mozilla.patch for more details 212 // std::string Uint64ToString( uint64_t ulValue ) 213 // { 214 // char buf[ 22 ]; 215 // #if defined( _WIN32 ) 216 // sprintf_s( buf, "%llu", ulValue ); 217 // #else 218 // snprintf( buf, sizeof( buf ), "%llu", (long long unsigned int ) ulValue ); 219 // #endif 220 // return buf; 221 // } 222 223 224 /** returns a uint64_t from a string */ 225 uint64_t StringToUint64( const std::string & sValue ) 226 { 227 return strtoull( sValue.c_str(), NULL, 0 ); 228 } 229 230 //----------------------------------------------------------------------------- 231 // Purpose: Helper for converting a numeric value to a hex digit, value should be 0-15. 232 //----------------------------------------------------------------------------- 233 char cIntToHexDigit( int nValue ) 234 { 235 //Assert( nValue >= 0 && nValue <= 15 ); 236 return "0123456789ABCDEF"[ nValue & 15 ]; 237 } 238 239 //----------------------------------------------------------------------------- 240 // Purpose: Helper for converting a hex char value to numeric, return -1 if the char 241 // is not a valid hex digit. 242 //----------------------------------------------------------------------------- 243 int iHexCharToInt( char cValue ) 244 { 245 int32_t iValue = cValue; 246 if ( (uint32_t)( iValue - '0' ) < 10 ) 247 return iValue - '0'; 248 249 iValue |= 0x20; 250 if ( (uint32_t)( iValue - 'a' ) < 6 ) 251 return iValue - 'a' + 10; 252 253 return -1; 254 } 255 256 257 //----------------------------------------------------------------------------- 258 // Purpose: These define the set of characters to filter for components (which 259 // need all the escaping we can muster) vs. paths (which don't want 260 // / and : escaped so we don't break less compliant URL handling code. 261 //----------------------------------------------------------------------------- 262 static bool CharNeedsEscape_Component( const char c ) 263 { 264 return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9') 265 && c != '-' && c != '_' && c != '.'); 266 } 267 static bool CharNeedsEscape_FullPath( const char c ) 268 { 269 return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9') 270 && c != '-' && c != '_' && c != '.' && c != '/' && c != ':' ); 271 } 272 273 274 //----------------------------------------------------------------------------- 275 // Purpose: Internal implementation of encode, works in the strict RFC manner, or 276 // with spaces turned to + like HTML form encoding. 277 //----------------------------------------------------------------------------- 278 void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen, 279 bool bUsePlusForSpace, std::function< bool(const char)> fnNeedsEscape ) 280 { 281 //AssertMsg( nDestLen > 3*nSourceLen, "Target buffer for V_URLEncode should be 3x source length, plus one for terminating null\n" ); 282 283 int iDestPos = 0; 284 for ( int i=0; i < nSourceLen; ++i ) 285 { 286 // worst case we need 3 additional chars 287 if( (iDestPos+3) > nDestLen ) 288 { 289 pchDest[0] = '\0'; 290 // AssertMsg( false, "Target buffer too short\n" ); 291 return; 292 } 293 294 // We allow only a-z, A-Z, 0-9, period, underscore, and hyphen to pass through unescaped. 295 // These are the characters allowed by both the original RFC 1738 and the latest RFC 3986. 296 // Current specs also allow '~', but that is forbidden under original RFC 1738. 297 if ( fnNeedsEscape( pchSource[i] ) ) 298 { 299 if ( bUsePlusForSpace && pchSource[i] == ' ' ) 300 { 301 pchDest[iDestPos++] = '+'; 302 } 303 else 304 { 305 pchDest[iDestPos++] = '%'; 306 uint8_t iValue = pchSource[i]; 307 if ( iValue == 0 ) 308 { 309 pchDest[iDestPos++] = '0'; 310 pchDest[iDestPos++] = '0'; 311 } 312 else 313 { 314 char cHexDigit1 = cIntToHexDigit( iValue % 16 ); 315 iValue /= 16; 316 char cHexDigit2 = cIntToHexDigit( iValue ); 317 pchDest[iDestPos++] = cHexDigit2; 318 pchDest[iDestPos++] = cHexDigit1; 319 } 320 } 321 } 322 else 323 { 324 pchDest[iDestPos++] = pchSource[i]; 325 } 326 } 327 328 if( (iDestPos+1) > nDestLen ) 329 { 330 pchDest[0] = '\0'; 331 //AssertMsg( false, "Target buffer too short to terminate\n" ); 332 return; 333 } 334 335 // Null terminate 336 pchDest[iDestPos++] = 0; 337 } 338 339 340 //----------------------------------------------------------------------------- 341 // Purpose: Internal implementation of decode, works in the strict RFC manner, or 342 // with spaces turned to + like HTML form encoding. 343 // 344 // Returns the amount of space used in the output buffer. 345 //----------------------------------------------------------------------------- 346 size_t V_URLDecodeInternal( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen, bool bUsePlusForSpace ) 347 { 348 if ( nDecodeDestLen < nEncodedSourceLen ) 349 { 350 //AssertMsg( false, "V_URLDecode needs a dest buffer at least as large as the source" ); 351 return 0; 352 } 353 354 int iDestPos = 0; 355 for( int i=0; i < nEncodedSourceLen; ++i ) 356 { 357 if ( bUsePlusForSpace && pchEncodedSource[i] == '+' ) 358 { 359 pchDecodeDest[ iDestPos++ ] = ' '; 360 } 361 else if ( pchEncodedSource[i] == '%' ) 362 { 363 // Percent signifies an encoded value, look ahead for the hex code, convert to numeric, and use that 364 365 // First make sure we have 2 more chars 366 if ( i < nEncodedSourceLen - 2 ) 367 { 368 char cHexDigit1 = pchEncodedSource[i+1]; 369 char cHexDigit2 = pchEncodedSource[i+2]; 370 371 // Turn the chars into a hex value, if they are not valid, then we'll 372 // just place the % and the following two chars direct into the string, 373 // even though this really shouldn't happen, who knows what bad clients 374 // may do with encoding. 375 bool bValid = false; 376 int iValue = iHexCharToInt( cHexDigit1 ); 377 if ( iValue != -1 ) 378 { 379 iValue *= 16; 380 int iValue2 = iHexCharToInt( cHexDigit2 ); 381 if ( iValue2 != -1 ) 382 { 383 iValue += iValue2; 384 pchDecodeDest[ iDestPos++ ] = (char)iValue; 385 bValid = true; 386 } 387 } 388 389 if ( !bValid ) 390 { 391 pchDecodeDest[ iDestPos++ ] = '%'; 392 pchDecodeDest[ iDestPos++ ] = cHexDigit1; 393 pchDecodeDest[ iDestPos++ ] = cHexDigit2; 394 } 395 } 396 397 // Skip ahead 398 i += 2; 399 } 400 else 401 { 402 pchDecodeDest[ iDestPos++ ] = pchEncodedSource[i]; 403 } 404 } 405 406 // We may not have extra room to NULL terminate, since this can be used on raw data, but if we do 407 // go ahead and do it as this can avoid bugs. 408 if ( iDestPos < nDecodeDestLen ) 409 { 410 pchDecodeDest[iDestPos] = 0; 411 } 412 413 return (size_t)iDestPos; 414 } 415 416 //----------------------------------------------------------------------------- 417 // Purpose: Encodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2. 418 // This version of the call isn't a strict RFC implementation, but uses + for space as is 419 // the standard in HTML form encoding, despite it not being part of the RFC. 420 // 421 // Dest buffer should be at least as large as source buffer to guarantee room for decode. 422 //----------------------------------------------------------------------------- 423 void V_URLEncode( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen ) 424 { 425 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true, CharNeedsEscape_Component ); 426 } 427 428 429 void V_URLEncodeNoPlusForSpace( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen ) 430 { 431 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_Component ); 432 } 433 434 void V_URLEncodeFullPath( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen ) 435 { 436 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_FullPath ); 437 } 438 439 //----------------------------------------------------------------------------- 440 // Purpose: Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2. 441 // This version of the call isn't a strict RFC implementation, but uses + for space as is 442 // the standard in HTML form encoding, despite it not being part of the RFC. 443 // 444 // Dest buffer should be at least as large as source buffer to guarantee room for decode. 445 // Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed. 446 //----------------------------------------------------------------------------- 447 size_t V_URLDecode( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen ) 448 { 449 return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, true ); 450 } 451 452 size_t V_URLDecodeNoPlusForSpace( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen ) 453 { 454 return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, false ); 455 } 456 457 //----------------------------------------------------------------------------- 458 void V_StripExtension( std::string &in ) 459 { 460 // Find the last dot. If it's followed by a dot or a slash, then it's part of a 461 // directory specifier like ../../somedir/./blah. 462 std::string::size_type test = in.rfind( '.' ); 463 if ( test != std::string::npos ) 464 { 465 // This handles things like ".\blah" or "c:\my@email.com\abc\def\geh" 466 // Which would otherwise wind up with "" and "c:\my@email", respectively. 467 if ( in.rfind( '\\' ) < test && in.rfind( '/' ) < test ) 468 { 469 in.resize( test ); 470 } 471 } 472 } 473 474 475 //----------------------------------------------------------------------------- 476 // Purpose: Tokenizes a string into a vector of strings 477 //----------------------------------------------------------------------------- 478 std::vector<std::string> TokenizeString( const std::string & sString, char cToken ) 479 { 480 std::vector<std::string> vecStrings; 481 std::istringstream stream( sString ); 482 std::string s; 483 while ( std::getline( stream, s, cToken ) ) 484 { 485 vecStrings.push_back( s ); 486 } 487 return vecStrings; 488 } 489 490 // Mozilla: see mozilla.patch for more details 491 //----------------------------------------------------------------------------- 492 // Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere 493 //----------------------------------------------------------------------------- 494 // bool RepairUTF8( const char *pbegin, const char *pend, std::string & sOutputUtf8 ) 495 // { 496 // typedef std::codecvt_utf8<char32_t> facet_type; 497 // facet_type myfacet; 498 499 // std::mbstate_t mystate = std::mbstate_t(); 500 501 // sOutputUtf8.clear(); 502 // sOutputUtf8.reserve( pend - pbegin ); 503 // bool bSqueakyClean = true; 504 505 // const char *pmid = pbegin; 506 // while ( pmid != pend ) 507 // { 508 // bool bHasError = false; 509 // bool bHasValidData = false; 510 511 // char32_t out = 0xdeadbeef, *pout; 512 // pbegin = pmid; 513 // switch ( myfacet.in( mystate, pbegin, pend, pmid, &out, &out + 1, pout ) ) 514 // { 515 // case facet_type::ok: 516 // bHasValidData = true; 517 // break; 518 519 // case facet_type::noconv: 520 // // unexpected! always converting type 521 // bSqueakyClean = false; 522 // break; 523 524 // case facet_type::partial: 525 // bHasError = pbegin == pmid; 526 // if ( bHasError ) 527 // { 528 // bSqueakyClean = false; 529 // } 530 // else 531 // { 532 // bHasValidData = true; 533 // } 534 // break; 535 536 // case facet_type::error: 537 // bHasError = true; 538 // bSqueakyClean = false; 539 // break; 540 // } 541 542 // if ( bHasValidData ) 543 // { 544 // // could convert back, but no need 545 // for ( const char *p = pbegin; p != pmid; ++p ) 546 // { 547 // sOutputUtf8 += *p; 548 // } 549 // } 550 551 // if ( bHasError ) 552 // { 553 // sOutputUtf8 += '?'; 554 // } 555 556 // if ( pmid == pbegin ) 557 // { 558 // pmid++; 559 // } 560 // } 561 562 // return bSqueakyClean; 563 // } 564 565 // //----------------------------------------------------------------------------- 566 // // Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere 567 // //----------------------------------------------------------------------------- 568 // bool RepairUTF8( const std::string & sInputUtf8, std::string & sOutputUtf8 ) 569 // { 570 // return RepairUTF8( sInputUtf8.data(), sInputUtf8.data() + sInputUtf8.size(), sOutputUtf8 ); 571 // }