tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testCharacterEncoding.cpp (6961B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mozilla/TextUtils.h"
      6 
      7 #include <clocale>
      8 #include <cstring>
      9 #include <cwchar>
     10 #include <string_view>
     11 
     12 #include "js/CharacterEncoding.h"
     13 #include "jsapi-tests/tests.h"
     14 
     15 static bool EqualsIgnoreCase(const char* xs, const char* ys) {
     16  while (*xs && *ys) {
     17    char x = *xs++;
     18    char y = *ys++;
     19 
     20    // Convert both to lower-case.
     21    if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) {
     22      x |= 0x20;
     23      y |= 0x20;
     24    }
     25 
     26    // Fail if the characters aren't the same.
     27    if (x != y) {
     28      return false;
     29    }
     30  }
     31 
     32  // Both strings must be read to the end.
     33  return !*xs && !*ys;
     34 }
     35 
     36 class ToUTF8Locale {
     37  const char* previousLocale_ = nullptr;
     38  bool supported_ = false;
     39 
     40 public:
     41  ToUTF8Locale() {
     42    // Store the old locale so we can reset it in the destructor.
     43    previousLocale_ = std::setlocale(LC_ALL, nullptr);
     44 
     45    // Query the system default locale.
     46    const char* defaultLocale = std::setlocale(LC_ALL, "");
     47    if (!defaultLocale) {
     48      // std::setlocale returns nullptr on failure.
     49      return;
     50    }
     51 
     52    // Switch the default locale to be UTF-8 aware.
     53    const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8");
     54    if (!newLocale) {
     55      // std::setlocale returns nullptr on failure.
     56      return;
     57    }
     58 
     59    const char* defaultCodepage = std::strchr(defaultLocale, '.');
     60    const char* newCodepage = std::strchr(newLocale, '.');
     61 
     62    // Return if either the default or new locale don't contain a code-page.
     63    if (!defaultCodepage || !newCodepage) {
     64      return;
     65    }
     66 
     67    // Skip past the '.'.
     68    defaultCodepage++;
     69    newCodepage++;
     70 
     71    // UTF-8 is supported when the default locale and new locale support it:
     72    //
     73    // The default locale needs to support UTF-8, because this test is compiled
     74    // using the default locale.
     75    //
     76    // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at
     77    // runtime.
     78    supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") &&
     79                 EqualsIgnoreCase(newCodepage, "UTF-8");
     80  }
     81 
     82  bool supported() const { return supported_; }
     83 
     84  ~ToUTF8Locale() {
     85    // Restore the previous locale.
     86    if (previousLocale_) {
     87      std::setlocale(LC_ALL, previousLocale_);
     88    }
     89  }
     90 };
     91 
     92 BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) {
     93  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
     94  // a no-op.
     95  for (std::string_view string : {
     96           "",
     97           "a",
     98           "abc",
     99           "abc\0def",
    100       }) {
    101    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
    102    CHECK(utf8 != nullptr);
    103    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
    104    CHECK(utf8.get() == string);
    105  }
    106  return true;
    107 }
    108 END_TEST(testCharacterEncoding_narrow_to_utf8)
    109 
    110 BEGIN_TEST(testCharacterEncoding_wide_to_utf8) {
    111  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
    112  // a no-op.
    113  for (std::wstring_view string : {
    114           L"",
    115           L"a",
    116           L"abc",
    117           L"abc\0def",
    118       }) {
    119    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    120    CHECK(utf8 != nullptr);
    121    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
    122    CHECK(std::equal(
    123        string.begin(), string.end(), utf8.get(),
    124        [](wchar_t x, char y) { return char32_t(x) == char32_t(y); }));
    125  }
    126  return true;
    127 }
    128 END_TEST(testCharacterEncoding_wide_to_utf8)
    129 
    130 BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) {
    131  // Change the locale to be UTF-8 aware for the emoji string.
    132  ToUTF8Locale utf8locale;
    133 
    134  // Skip this test if UTF-8 isn't supported on this system.
    135  if (!utf8locale.supported()) {
    136    return true;
    137  }
    138 
    139  {
    140    std::wstring_view string = L"ä";
    141    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    142    CHECK(utf8 != nullptr);
    143 
    144    CHECK_EQUAL(std::strlen(utf8.get()), 2U);
    145    CHECK_EQUAL(utf8[0], char(0xC3));
    146    CHECK_EQUAL(utf8[1], char(0xA4));
    147  }
    148  {
    149    std::wstring_view string = L"💩";
    150    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    151    CHECK(utf8 != nullptr);
    152 
    153    CHECK_EQUAL(std::strlen(utf8.get()), 4U);
    154    CHECK_EQUAL(utf8[0], char(0xF0));
    155    CHECK_EQUAL(utf8[1], char(0x9F));
    156    CHECK_EQUAL(utf8[2], char(0x92));
    157    CHECK_EQUAL(utf8[3], char(0xA9));
    158  }
    159  return true;
    160 }
    161 END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii)
    162 
    163 BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) {
    164  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
    165  // a no-op.
    166  for (std::string_view string : {
    167           "",
    168           "a",
    169           "abc",
    170           "abc\0def",
    171       }) {
    172    auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data());
    173    CHECK(narrow != nullptr);
    174    CHECK_EQUAL(std::strlen(narrow.get()), string.length());
    175    CHECK(narrow.get() == string);
    176  }
    177  return true;
    178 }
    179 END_TEST(testCharacterEncoding_utf8_to_narrow)
    180 
    181 BEGIN_TEST(testCharacterEncoding_utf8_to_wide) {
    182  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
    183  // a no-op.
    184  for (std::string_view string : {
    185           "",
    186           "a",
    187           "abc",
    188           "abc\0def",
    189       }) {
    190    auto wide = JS::EncodeUtf8ToWide(cx, string.data());
    191    CHECK(wide != nullptr);
    192    CHECK_EQUAL(std::wcslen(wide.get()), string.length());
    193    CHECK(std::equal(
    194        string.begin(), string.end(), wide.get(),
    195        [](char x, wchar_t y) { return char32_t(x) == char32_t(y); }));
    196  }
    197  return true;
    198 }
    199 END_TEST(testCharacterEncoding_utf8_to_wide)
    200 
    201 BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) {
    202  // Change the locale to be UTF-8 aware for the emoji string.
    203  ToUTF8Locale utf8locale;
    204 
    205  // Skip this test if UTF-8 isn't supported on this system.
    206  if (!utf8locale.supported()) {
    207    return true;
    208  }
    209 
    210  for (std::string_view string : {
    211           "",
    212           "a",
    213           "abc",
    214           "ä",
    215           "💩",
    216       }) {
    217    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
    218    CHECK(utf8 != nullptr);
    219 
    220    auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get());
    221    CHECK(narrow != nullptr);
    222 
    223    CHECK(narrow.get() == string);
    224  }
    225  return true;
    226 }
    227 END_TEST(testCharacterEncoding_narrow_roundtrip)
    228 
    229 BEGIN_TEST(testCharacterEncoding_wide_roundtrip) {
    230  // Change the locale to be UTF-8 aware for the emoji string.
    231  ToUTF8Locale utf8locale;
    232 
    233  // Skip this test if UTF-8 isn't supported on this system.
    234  if (!utf8locale.supported()) {
    235    return true;
    236  }
    237 
    238  for (std::wstring_view string : {
    239           L"",
    240           L"a",
    241           L"abc",
    242           L"ä",
    243           L"💩",
    244       }) {
    245    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    246    CHECK(utf8 != nullptr);
    247 
    248    auto wide = JS::EncodeUtf8ToWide(cx, utf8.get());
    249    CHECK(wide != nullptr);
    250 
    251    CHECK(wide.get() == string);
    252  }
    253  return true;
    254 }
    255 END_TEST(testCharacterEncoding_wide_roundtrip)