[ tor-browser ].git.dasho

strtools_public.cpp (17525B)
      1 //========= Copyright Valve Corporation ============//
      2 #include "strtools_public.h"
      3 #include <string.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <sstream>
      7 // Mozilla: see mozilla.patch for more details
      8 // #include <codecvt>
      9 // #include <iostream>
     10 #include <functional>
     11 #include <locale>
     12 // #include <codecvt>
     13 
     14 #if defined( _WIN32 )
     15 #include <windows.h>
     16 #endif
     17 
     18 //-----------------------------------------------------------------------------
     19 // Purpose:
     20 //-----------------------------------------------------------------------------
     21 bool StringHasPrefix( const std::string & sString, const std::string & sPrefix )
     22 {
     23 return 0 == strnicmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
     24 }
     25 
     26 bool StringHasPrefixCaseSensitive( const std::string & sString, const std::string & sPrefix )
     27 {
     28 return 0 == strncmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
     29 }
     30 
     31 
     32 bool StringHasSuffix( const std::string &sString, const std::string &sSuffix )
     33 {
     34 size_t cStrLen = sString.length();
     35 size_t cSuffixLen = sSuffix.length();
     36 
     37 if ( cSuffixLen > cStrLen )
     38 	return false;
     39 
     40 std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
     41 
     42 return 0 == stricmp( sStringSuffix.c_str(), sSuffix.c_str() );
     43 }
     44 
     45 bool StringHasSuffixCaseSensitive( const std::string &sString, const std::string &sSuffix )
     46 {
     47 size_t cStrLen = sString.length();
     48 size_t cSuffixLen = sSuffix.length();
     49 
     50 if ( cSuffixLen > cStrLen )
     51 	return false;
     52 
     53 std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
     54 
     55 return 0 == strncmp( sStringSuffix.c_str(), sSuffix.c_str(),cSuffixLen );
     56 }
     57 
     58 //-----------------------------------------------------------------------------
     59 // Purpose:
     60 //-----------------------------------------------------------------------------
     61 // Mozilla: see mozilla.patch for more details
     62 //typedef std::codecvt_utf8< wchar_t > convert_type;
     63 
     64 // Mozilla: see mozilla.patch for more details
     65 #if defined( _WIN32 )
     66 std::string UTF16to8(const wchar_t * in)
     67 {
     68 int retLength = ::WideCharToMultiByte(CP_UTF8, 0, in, -1, nullptr, 0, nullptr, nullptr);
     69 if (retLength == 0)
     70 {
     71 	return std::string();
     72 }
     73 
     74 char* retString = new char[retLength];
     75 ::WideCharToMultiByte(CP_UTF8, 0, in, -1, retString, retLength, nullptr, nullptr);
     76 
     77 std::string retStringValue(retString);
     78 
     79 delete[] retString;
     80 
     81 return retStringValue;
     82 
     83 // static std::wstring_convert< convert_type, wchar_t > s_converter;  // construction of this can be expensive (or even serialized) depending on locale
     84 
     85 // try
     86 // {
     87 // 	return s_converter.to_bytes( in );
     88 // }
     89 // catch ( ... )
     90 // {
     91 // 	return std::string();
     92 // }
     93 }
     94 
     95 std::string UTF16to8( const std::wstring & in ) { return UTF16to8( in.c_str() ); }
     96 
     97 // Mozilla: see mozilla.patch for more details
     98 std::wstring UTF8to16(const char * in)
     99 {
    100 int retLength = ::MultiByteToWideChar(CP_UTF8, 0, in, -1, nullptr, 0);
    101 if (retLength == 0)
    102 {
    103 	return std::wstring();
    104 }
    105 
    106 wchar_t* retString = new wchar_t[retLength];
    107 ::MultiByteToWideChar(CP_UTF8, 0, in, -1, retString, retLength);
    108 
    109 std::wstring retStringValue(retString);
    110 
    111 delete[] retString;
    112 
    113 return retStringValue;
    114 
    115 //static std::wstring_convert< convert_type, wchar_t > s_converter;  // construction of this can be expensive (or even serialized) depending on locale
    116 
    117 //try
    118 //{
    119 //	return s_converter.from_bytes( in );
    120 //}
    121 //catch ( ... )
    122 //{
    123 //	return std::wstring();
    124 //}
    125 }
    126 
    127 std::wstring UTF8to16( const std::string & in ) { return UTF8to16( in.c_str() ); }
    128 #endif
    129 
    130 
    131 #if defined( _WIN32 )
    132 //-----------------------------------------------------------------------------
    133 // Purpose: Convert LPSTR in the default CodePage to UTF8
    134 //-----------------------------------------------------------------------------
    135 std::string DefaultACPtoUTF8( const char *pszStr )
    136 {
    137 if ( GetACP() == CP_UTF8 )
    138 {
    139 	return pszStr;
    140 }
    141 else
    142 {
    143 	std::vector<wchar_t> vecBuf( strlen( pszStr ) + 1 ); // should be guaranteed to be enough
    144 	MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, pszStr, -1, vecBuf.data(), (int) vecBuf.size() );
    145 	return UTF16to8( vecBuf.data() );
    146 }
    147 }
    148 #endif
    149 
    150 // --------------------------------------------------------------------
    151 // Purpose:
    152 // --------------------------------------------------------------------
    153 void strcpy_safe( char *pchBuffer, size_t unBufferSizeBytes, const char *pchSource )
    154 {
    155 strncpy( pchBuffer, pchSource, unBufferSizeBytes - 1 );
    156 pchBuffer[unBufferSizeBytes - 1] = '\0';
    157 }
    158 
    159 // --------------------------------------------------------------------
    160 // Purpose: converts a string to upper case
    161 // --------------------------------------------------------------------
    162 std::string StringToUpper( const std::string & sString )
    163 {
    164 std::string sOut;
    165 sOut.reserve( sString.size() + 1 );
    166 for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
    167 {
    168 	sOut.push_back( (char)toupper( *i ) );
    169 }
    170 
    171 return sOut;
    172 }
    173 
    174 
    175 // --------------------------------------------------------------------
    176 // Purpose: converts a string to lower case
    177 // --------------------------------------------------------------------
    178 std::string StringToLower( const std::string & sString )
    179 {
    180 std::string sOut;
    181 sOut.reserve( sString.size() + 1 );
    182 for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
    183 {
    184 	sOut.push_back( (char)tolower( *i ) );
    185 }
    186 
    187 return sOut;
    188 }
    189 
    190 
    191 uint32_t ReturnStdString( const std::string & sValue, char *pchBuffer, uint32_t unBufferLen )
    192 {
    193 uint32_t unLen = (uint32_t)sValue.length() + 1;
    194 if( !pchBuffer || !unBufferLen )
    195 	return unLen;
    196 
    197 if( unBufferLen < unLen )
    198 {
    199 	pchBuffer[0] = '\0';
    200 }
    201 else
    202 {
    203 	memcpy( pchBuffer, sValue.c_str(), unLen );
    204 }
    205 
    206 return unLen;
    207 }
    208 
    209 
    210 /** Returns a std::string from a uint64_t */
    211 // Mozilla: see mozilla.patch for more details
    212 // std::string Uint64ToString( uint64_t ulValue )
    213 // {
    214 // 	char buf[ 22 ];
    215 // #if defined( _WIN32 )
    216 // 	sprintf_s( buf, "%llu", ulValue );
    217 // #else
    218 //     snprintf( buf, sizeof( buf ), "%llu", (long long unsigned int ) ulValue );
    219 // #endif
    220 // 	return buf;
    221 // }
    222 
    223 
    224 /** returns a uint64_t from a string */
    225 uint64_t StringToUint64( const std::string & sValue )
    226 {
    227 return strtoull( sValue.c_str(), NULL, 0 );
    228 }
    229 
    230 //-----------------------------------------------------------------------------
    231 // Purpose: Helper for converting a numeric value to a hex digit, value should be 0-15.
    232 //-----------------------------------------------------------------------------
    233 char cIntToHexDigit( int nValue )
    234 {
    235 //Assert( nValue >= 0 && nValue <= 15 );
    236 return "0123456789ABCDEF"[ nValue & 15 ];
    237 }
    238 
    239 //-----------------------------------------------------------------------------
    240 // Purpose: Helper for converting a hex char value to numeric, return -1 if the char
    241 //          is not a valid hex digit.
    242 //-----------------------------------------------------------------------------
    243 int iHexCharToInt( char cValue )
    244 {
    245 int32_t iValue = cValue;
    246 if ( (uint32_t)( iValue - '0' ) < 10 )
    247 	return iValue - '0';
    248 
    249 iValue |= 0x20;
    250 if ( (uint32_t)( iValue - 'a' ) < 6 )
    251 	return iValue - 'a' + 10;
    252 
    253 return -1;
    254 }
    255 
    256 
    257 //-----------------------------------------------------------------------------
    258 // Purpose: These define the set of characters to filter for components (which
    259 //			need all the escaping we can muster) vs. paths (which don't want
    260 //			/ and : escaped so we don't break less compliant URL handling code.
    261 //-----------------------------------------------------------------------------
    262 static bool CharNeedsEscape_Component( const char c )
    263 {
    264 return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')
    265 	&& c != '-' && c != '_' && c != '.');
    266 }
    267 static bool CharNeedsEscape_FullPath( const char c )
    268 {
    269 return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')
    270 	&& c != '-' && c != '_' && c != '.' && c != '/' && c != ':' );
    271 }
    272 
    273 
    274 //-----------------------------------------------------------------------------
    275 // Purpose: Internal implementation of encode, works in the strict RFC manner, or
    276 //          with spaces turned to + like HTML form encoding.
    277 //-----------------------------------------------------------------------------
    278 void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen, 
    279 bool bUsePlusForSpace, std::function< bool(const char)> fnNeedsEscape )
    280 {
    281 //AssertMsg( nDestLen > 3*nSourceLen, "Target buffer for V_URLEncode should be 3x source length, plus one for terminating null\n" );
    282 
    283 int iDestPos = 0;
    284 for ( int i=0; i < nSourceLen; ++i )
    285 {
    286 	// worst case we need 3 additional chars
    287 	if( (iDestPos+3) > nDestLen  )
    288 	{
    289 		pchDest[0] = '\0';
    290 //			AssertMsg( false, "Target buffer too short\n" );
    291 		return;
    292 	}
    293 
    294 	// We allow only a-z, A-Z, 0-9, period, underscore, and hyphen to pass through unescaped.
    295 	// These are the characters allowed by both the original RFC 1738 and the latest RFC 3986.
    296 	// Current specs also allow '~', but that is forbidden under original RFC 1738.
    297 	if ( fnNeedsEscape( pchSource[i] ) )
    298 	{
    299 		if ( bUsePlusForSpace && pchSource[i] == ' ' )
    300 		{
    301 			pchDest[iDestPos++] = '+';
    302 		}
    303 		else
    304 		{
    305 			pchDest[iDestPos++] = '%';
    306 			uint8_t iValue = pchSource[i];
    307 			if ( iValue == 0 )
    308 			{
    309 				pchDest[iDestPos++] = '0';
    310 				pchDest[iDestPos++] = '0';
    311 			}
    312 			else
    313 			{
    314 				char cHexDigit1 = cIntToHexDigit( iValue % 16 );
    315 				iValue /= 16;
    316 				char cHexDigit2 = cIntToHexDigit( iValue );
    317 				pchDest[iDestPos++] = cHexDigit2;
    318 				pchDest[iDestPos++] = cHexDigit1;
    319 			}
    320 		}
    321 	}
    322 	else
    323 	{
    324 		pchDest[iDestPos++] = pchSource[i];
    325 	}
    326 }
    327 
    328 if( (iDestPos+1) > nDestLen )
    329 {
    330 	pchDest[0] = '\0';
    331 	//AssertMsg( false, "Target buffer too short to terminate\n" );
    332 	return;
    333 }
    334 
    335 // Null terminate
    336 pchDest[iDestPos++] = 0;
    337 }
    338 
    339 
    340 //-----------------------------------------------------------------------------
    341 // Purpose: Internal implementation of decode, works in the strict RFC manner, or
    342 //          with spaces turned to + like HTML form encoding.
    343 //
    344 //			Returns the amount of space used in the output buffer.
    345 //-----------------------------------------------------------------------------
    346 size_t V_URLDecodeInternal( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen, bool bUsePlusForSpace )
    347 {
    348 if ( nDecodeDestLen < nEncodedSourceLen )
    349 {
    350 	//AssertMsg( false, "V_URLDecode needs a dest buffer at least as large as the source" );
    351 	return 0;
    352 }
    353 
    354 int iDestPos = 0;
    355 for( int i=0; i < nEncodedSourceLen; ++i )
    356 {
    357 	if ( bUsePlusForSpace && pchEncodedSource[i] == '+' )
    358 	{
    359 		pchDecodeDest[ iDestPos++ ] = ' ';
    360 	}
    361 	else if ( pchEncodedSource[i] == '%' )
    362 	{
    363 		// Percent signifies an encoded value, look ahead for the hex code, convert to numeric, and use that
    364 
    365 		// First make sure we have 2 more chars
    366 		if ( i < nEncodedSourceLen - 2 )
    367 		{
    368 			char cHexDigit1 = pchEncodedSource[i+1];
    369 			char cHexDigit2 = pchEncodedSource[i+2];
    370 
    371 			// Turn the chars into a hex value, if they are not valid, then we'll
    372 			// just place the % and the following two chars direct into the string,
    373 			// even though this really shouldn't happen, who knows what bad clients
    374 			// may do with encoding.
    375 			bool bValid = false;
    376 			int iValue = iHexCharToInt( cHexDigit1 );
    377 			if ( iValue != -1 )
    378 			{
    379 				iValue *= 16;
    380 				int iValue2 = iHexCharToInt( cHexDigit2 );
    381 				if ( iValue2 != -1 )
    382 				{
    383 					iValue += iValue2;
    384 					pchDecodeDest[ iDestPos++ ] = (char)iValue;
    385 					bValid = true;
    386 				}
    387 			}
    388 
    389 			if ( !bValid )
    390 			{
    391 				pchDecodeDest[ iDestPos++ ] = '%';
    392 				pchDecodeDest[ iDestPos++ ] = cHexDigit1;
    393 				pchDecodeDest[ iDestPos++ ] = cHexDigit2;
    394 			}
    395 		}
    396 
    397 		// Skip ahead
    398 		i += 2;
    399 	}
    400 	else
    401 	{
    402 		pchDecodeDest[ iDestPos++ ] = pchEncodedSource[i];
    403 	}
    404 }
    405 
    406 // We may not have extra room to NULL terminate, since this can be used on raw data, but if we do
    407 // go ahead and do it as this can avoid bugs.
    408 if ( iDestPos < nDecodeDestLen )
    409 {
    410 	pchDecodeDest[iDestPos] = 0;
    411 }
    412 
    413 return (size_t)iDestPos;
    414 }
    415 
    416 //-----------------------------------------------------------------------------
    417 // Purpose: Encodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.  
    418 //          This version of the call isn't a strict RFC implementation, but uses + for space as is
    419 //          the standard in HTML form encoding, despite it not being part of the RFC.
    420 //
    421 //          Dest buffer should be at least as large as source buffer to guarantee room for decode.
    422 //-----------------------------------------------------------------------------
    423 void V_URLEncode( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
    424 {
    425 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true, CharNeedsEscape_Component );
    426 }
    427 
    428 
    429 void V_URLEncodeNoPlusForSpace( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
    430 {
    431 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_Component );
    432 }
    433 
    434 void V_URLEncodeFullPath( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
    435 {
    436 return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_FullPath );
    437 }
    438 
    439 //-----------------------------------------------------------------------------
    440 // Purpose: Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.  
    441 //          This version of the call isn't a strict RFC implementation, but uses + for space as is
    442 //          the standard in HTML form encoding, despite it not being part of the RFC.
    443 //
    444 //          Dest buffer should be at least as large as source buffer to guarantee room for decode.
    445 //			Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed.
    446 //-----------------------------------------------------------------------------
    447 size_t V_URLDecode( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen )
    448 {
    449 return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, true );
    450 }
    451 
    452 size_t V_URLDecodeNoPlusForSpace( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen )
    453 {
    454 return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, false );
    455 }
    456 
    457 //-----------------------------------------------------------------------------
    458 void V_StripExtension( std::string &in )
    459 {
    460 // Find the last dot. If it's followed by a dot or a slash, then it's part of a 
    461 // directory specifier like ../../somedir/./blah.
    462 std::string::size_type test = in.rfind( '.' );
    463 if ( test != std::string::npos )
    464 {
    465 	// This handles things like ".\blah" or "c:\my@email.com\abc\def\geh"
    466 	// Which would otherwise wind up with "" and "c:\my@email", respectively.
    467 	if ( in.rfind( '\\' ) < test && in.rfind( '/' ) < test )
    468 	{
    469 		in.resize( test );
    470 	}
    471 }
    472 }
    473 
    474 
    475 //-----------------------------------------------------------------------------
    476 // Purpose: Tokenizes a string into a vector of strings
    477 //-----------------------------------------------------------------------------
    478 std::vector<std::string> TokenizeString( const std::string & sString, char cToken )
    479 {
    480 std::vector<std::string> vecStrings;
    481 std::istringstream stream( sString );
    482 std::string s;
    483 while ( std::getline( stream, s, cToken ) )
    484 {
    485 	vecStrings.push_back( s );
    486 }
    487 return vecStrings;
    488 }
    489 
    490 // Mozilla: see mozilla.patch for more details
    491 //-----------------------------------------------------------------------------
    492 // Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere
    493 //-----------------------------------------------------------------------------
    494 // bool RepairUTF8( const char *pbegin, const char *pend, std::string & sOutputUtf8 )
    495 // {
    496 // 	typedef std::codecvt_utf8<char32_t> facet_type;
    497 // 	facet_type myfacet;
    498 
    499 // 	std::mbstate_t mystate = std::mbstate_t();
    500 
    501 // 	sOutputUtf8.clear();
    502 // 	sOutputUtf8.reserve( pend - pbegin );
    503 // 	bool bSqueakyClean = true;
    504 
    505 // 	const char *pmid = pbegin;
    506 // 	while ( pmid != pend )
    507 // 	{
    508 // 		bool bHasError = false;
    509 // 		bool bHasValidData = false;
    510 
    511 // 		char32_t out = 0xdeadbeef, *pout;
    512 // 		pbegin = pmid;
    513 // 		switch ( myfacet.in( mystate, pbegin, pend, pmid, &out, &out + 1, pout ) )
    514 // 		{
    515 // 		case facet_type::ok:
    516 // 			bHasValidData = true;
    517 // 			break;
    518 
    519 // 		case facet_type::noconv:
    520 // 			// unexpected! always converting type
    521 // 			bSqueakyClean = false;
    522 // 			break;
    523 
    524 // 		case facet_type::partial:
    525 // 			bHasError = pbegin == pmid;
    526 // 			if ( bHasError )
    527 // 			{
    528 // 				bSqueakyClean = false;
    529 // 			}
    530 // 			else
    531 // 			{
    532 // 				bHasValidData = true;
    533 // 			}
    534 // 			break;
    535 
    536 // 		case facet_type::error:
    537 // 			bHasError = true;
    538 // 			bSqueakyClean = false;
    539 // 			break;
    540 // 		}
    541 
    542 // 		if ( bHasValidData )
    543 // 		{
    544 // 			// could convert back, but no need
    545 // 			for ( const char *p = pbegin; p != pmid; ++p )
    546 // 			{
    547 // 				sOutputUtf8 += *p;
    548 // 			}
    549 // 		}
    550 
    551 // 		if ( bHasError )
    552 // 		{
    553 // 			sOutputUtf8 += '?';
    554 // 		}
    555 
    556 // 		if ( pmid == pbegin )
    557 // 		{
    558 // 			pmid++;
    559 // 		}
    560 // 	}
    561 
    562 // 	return bSqueakyClean;
    563 // }
    564 
    565 // //-----------------------------------------------------------------------------
    566 // // Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere
    567 // //-----------------------------------------------------------------------------
    568 // bool RepairUTF8( const std::string & sInputUtf8, std::string & sOutputUtf8 )
    569 // {
    570 // 	return RepairUTF8( sInputUtf8.data(), sInputUtf8.data() + sInputUtf8.size(), sOutputUtf8 );
    571 // }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE