testCharacterEncoding.cpp (6961B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mozilla/TextUtils.h" 6 7 #include <clocale> 8 #include <cstring> 9 #include <cwchar> 10 #include <string_view> 11 12 #include "js/CharacterEncoding.h" 13 #include "jsapi-tests/tests.h" 14 15 static bool EqualsIgnoreCase(const char* xs, const char* ys) { 16 while (*xs && *ys) { 17 char x = *xs++; 18 char y = *ys++; 19 20 // Convert both to lower-case. 21 if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) { 22 x |= 0x20; 23 y |= 0x20; 24 } 25 26 // Fail if the characters aren't the same. 27 if (x != y) { 28 return false; 29 } 30 } 31 32 // Both strings must be read to the end. 33 return !*xs && !*ys; 34 } 35 36 class ToUTF8Locale { 37 const char* previousLocale_ = nullptr; 38 bool supported_ = false; 39 40 public: 41 ToUTF8Locale() { 42 // Store the old locale so we can reset it in the destructor. 43 previousLocale_ = std::setlocale(LC_ALL, nullptr); 44 45 // Query the system default locale. 46 const char* defaultLocale = std::setlocale(LC_ALL, ""); 47 if (!defaultLocale) { 48 // std::setlocale returns nullptr on failure. 49 return; 50 } 51 52 // Switch the default locale to be UTF-8 aware. 53 const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8"); 54 if (!newLocale) { 55 // std::setlocale returns nullptr on failure. 56 return; 57 } 58 59 const char* defaultCodepage = std::strchr(defaultLocale, '.'); 60 const char* newCodepage = std::strchr(newLocale, '.'); 61 62 // Return if either the default or new locale don't contain a code-page. 63 if (!defaultCodepage || !newCodepage) { 64 return; 65 } 66 67 // Skip past the '.'. 68 defaultCodepage++; 69 newCodepage++; 70 71 // UTF-8 is supported when the default locale and new locale support it: 72 // 73 // The default locale needs to support UTF-8, because this test is compiled 74 // using the default locale. 75 // 76 // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at 77 // runtime. 78 supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") && 79 EqualsIgnoreCase(newCodepage, "UTF-8"); 80 } 81 82 bool supported() const { return supported_; } 83 84 ~ToUTF8Locale() { 85 // Restore the previous locale. 86 if (previousLocale_) { 87 std::setlocale(LC_ALL, previousLocale_); 88 } 89 } 90 }; 91 92 BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) { 93 // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is 94 // a no-op. 95 for (std::string_view string : { 96 "", 97 "a", 98 "abc", 99 "abc\0def", 100 }) { 101 auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); 102 CHECK(utf8 != nullptr); 103 CHECK_EQUAL(std::strlen(utf8.get()), string.length()); 104 CHECK(utf8.get() == string); 105 } 106 return true; 107 } 108 END_TEST(testCharacterEncoding_narrow_to_utf8) 109 110 BEGIN_TEST(testCharacterEncoding_wide_to_utf8) { 111 // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is 112 // a no-op. 113 for (std::wstring_view string : { 114 L"", 115 L"a", 116 L"abc", 117 L"abc\0def", 118 }) { 119 auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); 120 CHECK(utf8 != nullptr); 121 CHECK_EQUAL(std::strlen(utf8.get()), string.length()); 122 CHECK(std::equal( 123 string.begin(), string.end(), utf8.get(), 124 [](wchar_t x, char y) { return char32_t(x) == char32_t(y); })); 125 } 126 return true; 127 } 128 END_TEST(testCharacterEncoding_wide_to_utf8) 129 130 BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) { 131 // Change the locale to be UTF-8 aware for the emoji string. 132 ToUTF8Locale utf8locale; 133 134 // Skip this test if UTF-8 isn't supported on this system. 135 if (!utf8locale.supported()) { 136 return true; 137 } 138 139 { 140 std::wstring_view string = L"ä"; 141 auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); 142 CHECK(utf8 != nullptr); 143 144 CHECK_EQUAL(std::strlen(utf8.get()), 2U); 145 CHECK_EQUAL(utf8[0], char(0xC3)); 146 CHECK_EQUAL(utf8[1], char(0xA4)); 147 } 148 { 149 std::wstring_view string = L"💩"; 150 auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); 151 CHECK(utf8 != nullptr); 152 153 CHECK_EQUAL(std::strlen(utf8.get()), 4U); 154 CHECK_EQUAL(utf8[0], char(0xF0)); 155 CHECK_EQUAL(utf8[1], char(0x9F)); 156 CHECK_EQUAL(utf8[2], char(0x92)); 157 CHECK_EQUAL(utf8[3], char(0xA9)); 158 } 159 return true; 160 } 161 END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) 162 163 BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) { 164 // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is 165 // a no-op. 166 for (std::string_view string : { 167 "", 168 "a", 169 "abc", 170 "abc\0def", 171 }) { 172 auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data()); 173 CHECK(narrow != nullptr); 174 CHECK_EQUAL(std::strlen(narrow.get()), string.length()); 175 CHECK(narrow.get() == string); 176 } 177 return true; 178 } 179 END_TEST(testCharacterEncoding_utf8_to_narrow) 180 181 BEGIN_TEST(testCharacterEncoding_utf8_to_wide) { 182 // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is 183 // a no-op. 184 for (std::string_view string : { 185 "", 186 "a", 187 "abc", 188 "abc\0def", 189 }) { 190 auto wide = JS::EncodeUtf8ToWide(cx, string.data()); 191 CHECK(wide != nullptr); 192 CHECK_EQUAL(std::wcslen(wide.get()), string.length()); 193 CHECK(std::equal( 194 string.begin(), string.end(), wide.get(), 195 [](char x, wchar_t y) { return char32_t(x) == char32_t(y); })); 196 } 197 return true; 198 } 199 END_TEST(testCharacterEncoding_utf8_to_wide) 200 201 BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) { 202 // Change the locale to be UTF-8 aware for the emoji string. 203 ToUTF8Locale utf8locale; 204 205 // Skip this test if UTF-8 isn't supported on this system. 206 if (!utf8locale.supported()) { 207 return true; 208 } 209 210 for (std::string_view string : { 211 "", 212 "a", 213 "abc", 214 "ä", 215 "💩", 216 }) { 217 auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); 218 CHECK(utf8 != nullptr); 219 220 auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get()); 221 CHECK(narrow != nullptr); 222 223 CHECK(narrow.get() == string); 224 } 225 return true; 226 } 227 END_TEST(testCharacterEncoding_narrow_roundtrip) 228 229 BEGIN_TEST(testCharacterEncoding_wide_roundtrip) { 230 // Change the locale to be UTF-8 aware for the emoji string. 231 ToUTF8Locale utf8locale; 232 233 // Skip this test if UTF-8 isn't supported on this system. 234 if (!utf8locale.supported()) { 235 return true; 236 } 237 238 for (std::wstring_view string : { 239 L"", 240 L"a", 241 L"abc", 242 L"ä", 243 L"💩", 244 }) { 245 auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); 246 CHECK(utf8 != nullptr); 247 248 auto wide = JS::EncodeUtf8ToWide(cx, utf8.get()); 249 CHECK(wide != nullptr); 250 251 CHECK(wide.get() == string); 252 } 253 return true; 254 } 255 END_TEST(testCharacterEncoding_wide_roundtrip)