tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

StringConvert.cpp (7776B)


      1 // Common/StringConvert.cpp
      2 
      3 #include "StdAfx.h"
      4 
      5 #include "StringConvert.h"
      6 
      7 #ifndef _WIN32
      8 #include <stdlib.h>
      9 #endif
     10 
     11 static const char k_DefultChar = '_';
     12 
     13 #ifdef _WIN32
     14 
     15 /*
     16 MultiByteToWideChar(CodePage, DWORD dwFlags,
     17    LPCSTR lpMultiByteStr, int cbMultiByte,
     18    LPWSTR lpWideCharStr, int cchWideChar)
     19 
     20  if (cbMultiByte == 0)
     21    return: 0. ERR: ERROR_INVALID_PARAMETER
     22 
     23  if (cchWideChar == 0)
     24    return: the required buffer size in characters.
     25 
     26  if (supplied buffer size was not large enough)
     27    return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
     28    The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
     29 
     30  If there are illegal characters:
     31    if MB_ERR_INVALID_CHARS is set in dwFlags:
     32      - the function stops conversion on illegal character.
     33      - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
     34    
     35    if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
     36      before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
     37      in Vista+:    illegal character is not dropped (MSDN). Undocumented: illegal
     38                    character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
     39 */
     40 
     41 
     42 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
     43 {
     44  dest.Empty();
     45  if (src.IsEmpty())
     46    return;
     47  {
     48    /*
     49    wchar_t *d = dest.GetBuf(src.Len());
     50    const char *s = (const char *)src;
     51    unsigned i;
     52    
     53    for (i = 0;;)
     54    {
     55      Byte c = (Byte)s[i];
     56      if (c >= 0x80 || c == 0)
     57        break;
     58      d[i++] = (wchar_t)c;
     59    }
     60 
     61    if (i != src.Len())
     62    {
     63      unsigned len = MultiByteToWideChar(codePage, 0, s + i,
     64          src.Len() - i, d + i,
     65          src.Len() + 1 - i);
     66      if (len == 0)
     67        throw 282228;
     68      i += len;
     69    }
     70 
     71    d[i] = 0;
     72    dest.ReleaseBuf_SetLen(i);
     73    */
     74    unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
     75    if (len == 0)
     76    {
     77      if (GetLastError() != 0)
     78        throw 282228;
     79    }
     80    else
     81    {
     82      len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
     83      if (len == 0)
     84        throw 282228;
     85      dest.ReleaseBuf_SetEnd(len);
     86    }
     87  }
     88 }
     89 
     90 /*
     91  int WideCharToMultiByte(
     92      UINT CodePage, DWORD dwFlags,
     93      LPCWSTR lpWideCharStr, int cchWideChar,
     94      LPSTR lpMultiByteStr, int cbMultiByte,
     95      LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
     96 
     97 if (lpDefaultChar == NULL),
     98  - it uses system default value.
     99 
    100 if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
    101  if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
    102    return: 0. ERR: ERROR_INVALID_PARAMETER.
    103 
    104 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
    105 
    106 */
    107 
    108 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
    109 {
    110  dest.Empty();
    111  defaultCharWasUsed = false;
    112  if (src.IsEmpty())
    113    return;
    114  {
    115    /*
    116    unsigned numRequiredBytes = src.Len() * 2;
    117    char *d = dest.GetBuf(numRequiredBytes);
    118    const wchar_t *s = (const wchar_t *)src;
    119    unsigned i;
    120    
    121    for (i = 0;;)
    122    {
    123      wchar_t c = s[i];
    124      if (c >= 0x80 || c == 0)
    125        break;
    126      d[i++] = (char)c;
    127    }
    128    
    129    if (i != src.Len())
    130    {
    131      BOOL defUsed = FALSE;
    132      defaultChar = defaultChar;
    133 
    134      bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
    135      unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
    136          d + i, numRequiredBytes + 1 - i,
    137          (isUtf ? NULL : &defaultChar),
    138          (isUtf ? NULL : &defUsed));
    139      defaultCharWasUsed = (defUsed != FALSE);
    140      if (len == 0)
    141        throw 282229;
    142      i += len;
    143    }
    144 
    145    d[i] = 0;
    146    dest.ReleaseBuf_SetLen(i);
    147    */
    148 
    149    /*
    150    if (codePage != CP_UTF7)
    151    {
    152      const wchar_t *s = (const wchar_t *)src;
    153      unsigned i;
    154      for (i = 0;; i++)
    155      {
    156        wchar_t c = s[i];
    157        if (c >= 0x80 || c == 0)
    158          break;
    159      }
    160      
    161      if (s[i] == 0)
    162      {
    163        char *d = dest.GetBuf(src.Len());
    164        for (i = 0;;)
    165        {
    166          wchar_t c = s[i];
    167          if (c == 0)
    168            break;
    169          d[i++] = (char)c;
    170        }
    171        d[i] = 0;
    172        dest.ReleaseBuf_SetLen(i);
    173        return;
    174      }
    175    }
    176    */
    177 
    178    unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
    179    if (len == 0)
    180    {
    181      if (GetLastError() != 0)
    182        throw 282228;
    183    }
    184    else
    185    {
    186      BOOL defUsed = FALSE;
    187      bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
    188      // defaultChar = defaultChar;
    189      len = WideCharToMultiByte(codePage, 0, src, src.Len(),
    190          dest.GetBuf(len), len,
    191          (isUtf ? NULL : &defaultChar),
    192          (isUtf ? NULL : &defUsed)
    193          );
    194      if (!isUtf)
    195        defaultCharWasUsed = (defUsed != FALSE);
    196      if (len == 0)
    197        throw 282228;
    198      dest.ReleaseBuf_SetEnd(len);
    199    }
    200  }
    201 }
    202 
    203 /*
    204 #ifndef UNDER_CE
    205 AString SystemStringToOemString(const CSysString &src)
    206 {
    207  AString dest;
    208  const unsigned len = src.Len() * 2;
    209  CharToOem(src, dest.GetBuf(len));
    210  dest.ReleaseBuf_CalcLen(len);
    211  return dest;
    212 }
    213 #endif
    214 */
    215 
    216 #else
    217 
    218 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
    219 {
    220  dest.Empty();
    221  if (src.IsEmpty())
    222    return;
    223 
    224  size_t limit = ((size_t)src.Len() + 1) * 2;
    225  wchar_t *d = dest.GetBuf((unsigned)limit);
    226  size_t len = mbstowcs(d, src, limit);
    227  if (len != (size_t)-1)
    228  {
    229    dest.ReleaseBuf_SetEnd((unsigned)len);
    230    return;
    231  }
    232  
    233  {
    234    unsigned i;
    235    const char *s = (const char *)src;
    236    for (i = 0;;)
    237    {
    238      Byte c = (Byte)s[i];
    239      if (c == 0)
    240        break;
    241      d[i++] = (wchar_t)c;
    242    }
    243    d[i] = 0;
    244    dest.ReleaseBuf_SetLen(i);
    245  }
    246 }
    247 
    248 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
    249 {
    250  dest.Empty();
    251  defaultCharWasUsed = false;
    252  if (src.IsEmpty())
    253    return;
    254 
    255  size_t limit = ((size_t)src.Len() + 1) * 6;
    256  char *d = dest.GetBuf((unsigned)limit);
    257  size_t len = wcstombs(d, src, limit);
    258  if (len != (size_t)-1)
    259  {
    260    dest.ReleaseBuf_SetEnd((unsigned)len);
    261    return;
    262  }
    263 
    264  {
    265    const wchar_t *s = (const wchar_t *)src;
    266    unsigned i;
    267    for (i = 0;;)
    268    {
    269      wchar_t c = s[i];
    270      if (c == 0)
    271        break;
    272      if (c >= 0x100)
    273      {
    274        c = defaultChar;
    275        defaultCharWasUsed = true;
    276      }
    277      d[i++] = (char)c;
    278    }
    279    d[i] = 0;
    280    dest.ReleaseBuf_SetLen(i);
    281  }
    282 }
    283 
    284 #endif
    285 
    286 
    287 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
    288 {
    289  UString dest;
    290  MultiByteToUnicodeString2(dest, src, codePage);
    291  return dest;
    292 }
    293 
    294 UString MultiByteToUnicodeString(const char *src, UINT codePage)
    295 {
    296  return MultiByteToUnicodeString(AString(src), codePage);
    297 }
    298 
    299 
    300 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
    301 {
    302  bool defaultCharWasUsed;
    303  UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
    304 }
    305 
    306 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
    307 {
    308  AString dest;
    309  UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
    310  return dest;
    311 }
    312 
    313 AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
    314 {
    315  AString dest;
    316  bool defaultCharWasUsed;
    317  UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
    318  return dest;
    319 }