TestString.cpp (8482B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #include "gtest/gtest.h" 5 6 #include "mozilla/intl/String.h" 7 #include "mozilla/Span.h" 8 #include "mozilla/TextUtils.h" 9 #include "mozilla/Try.h" 10 11 #include <algorithm> 12 13 #include "TestBuffer.h" 14 15 namespace mozilla::intl { 16 17 static Result<std::u16string_view, ICUError> ToLocaleLowerCase( 18 const char* aLocale, const char16_t* aString, 19 TestBuffer<char16_t>& aBuffer) { 20 aBuffer.clear(); 21 22 MOZ_TRY(String::ToLocaleLowerCase(aLocale, MakeStringSpan(aString), aBuffer)); 23 24 return aBuffer.get_string_view(); 25 } 26 27 static Result<std::u16string_view, ICUError> ToLocaleUpperCase( 28 const char* aLocale, const char16_t* aString, 29 TestBuffer<char16_t>& aBuffer) { 30 aBuffer.clear(); 31 32 MOZ_TRY(String::ToLocaleUpperCase(aLocale, MakeStringSpan(aString), aBuffer)); 33 34 return aBuffer.get_string_view(); 35 } 36 37 TEST(IntlString, ToLocaleLowerCase) 38 { 39 TestBuffer<char16_t> buf; 40 41 ASSERT_EQ(ToLocaleLowerCase("en", u"test", buf).unwrap(), u"test"); 42 ASSERT_EQ(ToLocaleLowerCase("en", u"TEST", buf).unwrap(), u"test"); 43 44 // Turkish dotless i. 45 ASSERT_EQ(ToLocaleLowerCase("tr", u"I", buf).unwrap(), u"ı"); 46 ASSERT_EQ(ToLocaleLowerCase("tr", u"İ", buf).unwrap(), u"i"); 47 ASSERT_EQ(ToLocaleLowerCase("tr", u"I\u0307", buf).unwrap(), u"i"); 48 } 49 50 TEST(IntlString, ToLocaleUpperCase) 51 { 52 TestBuffer<char16_t> buf; 53 54 ASSERT_EQ(ToLocaleUpperCase("en", u"test", buf).unwrap(), u"TEST"); 55 ASSERT_EQ(ToLocaleUpperCase("en", u"TEST", buf).unwrap(), u"TEST"); 56 57 // Turkish dotless i. 58 ASSERT_EQ(ToLocaleUpperCase("tr", u"i", buf).unwrap(), u"İ"); 59 ASSERT_EQ(ToLocaleUpperCase("tr", u"ı", buf).unwrap(), u"I"); 60 61 // Output can be longer than the input string. 62 ASSERT_EQ(ToLocaleUpperCase("en", u"Größenmaßstäbe", buf).unwrap(), 63 u"GRÖSSENMASSSTÄBE"); 64 } 65 66 TEST(IntlString, NormalizeNFC) 67 { 68 using namespace std::literals; 69 70 using NormalizationForm = String::NormalizationForm; 71 using AlreadyNormalized = String::AlreadyNormalized; 72 73 TestBuffer<char16_t> buf; 74 75 auto alreadyNormalized = 76 String::Normalize(NormalizationForm::NFC, u""sv, buf); 77 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 78 ASSERT_EQ(buf.get_string_view(), u""); 79 80 alreadyNormalized = 81 String::Normalize(NormalizationForm::NFC, u"abcdef"sv, buf); 82 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 83 ASSERT_EQ(buf.get_string_view(), u""); 84 85 alreadyNormalized = 86 String::Normalize(NormalizationForm::NFC, u"a\u0308"sv, buf); 87 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 88 ASSERT_EQ(buf.get_string_view(), u"ä"); 89 90 buf.clear(); 91 92 alreadyNormalized = String::Normalize(NormalizationForm::NFC, u"½"sv, buf); 93 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 94 ASSERT_EQ(buf.get_string_view(), u""); 95 } 96 97 TEST(IntlString, NormalizeNFD) 98 { 99 using namespace std::literals; 100 101 using NormalizationForm = String::NormalizationForm; 102 using AlreadyNormalized = String::AlreadyNormalized; 103 104 TestBuffer<char16_t> buf; 105 106 auto alreadyNormalized = 107 String::Normalize(NormalizationForm::NFD, u""sv, buf); 108 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 109 ASSERT_EQ(buf.get_string_view(), u""); 110 111 alreadyNormalized = 112 String::Normalize(NormalizationForm::NFD, u"abcdef"sv, buf); 113 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 114 ASSERT_EQ(buf.get_string_view(), u""); 115 116 alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"ä"sv, buf); 117 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 118 ASSERT_EQ(buf.get_string_view(), u"a\u0308"); 119 120 buf.clear(); 121 122 alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"½"sv, buf); 123 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 124 ASSERT_EQ(buf.get_string_view(), u""); 125 126 // Test with inline capacity. 127 TestBuffer<char16_t, 2> buf2; 128 129 alreadyNormalized = String::Normalize(NormalizationForm::NFD, u" ç"sv, buf2); 130 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 131 ASSERT_EQ(buf2.get_string_view(), u" c\u0327"); 132 } 133 134 TEST(IntlString, NormalizeNFKC) 135 { 136 using namespace std::literals; 137 138 using NormalizationForm = String::NormalizationForm; 139 using AlreadyNormalized = String::AlreadyNormalized; 140 141 TestBuffer<char16_t> buf; 142 143 auto alreadyNormalized = 144 String::Normalize(NormalizationForm::NFKC, u""sv, buf); 145 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 146 ASSERT_EQ(buf.get_string_view(), u""); 147 148 alreadyNormalized = 149 String::Normalize(NormalizationForm::NFKC, u"abcdef"sv, buf); 150 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 151 ASSERT_EQ(buf.get_string_view(), u""); 152 153 alreadyNormalized = 154 String::Normalize(NormalizationForm::NFKC, u"a\u0308"sv, buf); 155 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 156 ASSERT_EQ(buf.get_string_view(), u"ä"); 157 158 buf.clear(); 159 160 alreadyNormalized = String::Normalize(NormalizationForm::NFKC, u"½"sv, buf); 161 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 162 ASSERT_EQ(buf.get_string_view(), u"1⁄2"); 163 } 164 165 TEST(IntlString, NormalizeNFKD) 166 { 167 using namespace std::literals; 168 169 using NormalizationForm = String::NormalizationForm; 170 using AlreadyNormalized = String::AlreadyNormalized; 171 172 TestBuffer<char16_t> buf; 173 174 auto alreadyNormalized = 175 String::Normalize(NormalizationForm::NFKD, u""sv, buf); 176 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 177 ASSERT_EQ(buf.get_string_view(), u""); 178 179 alreadyNormalized = 180 String::Normalize(NormalizationForm::NFKD, u"abcdef"sv, buf); 181 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); 182 ASSERT_EQ(buf.get_string_view(), u""); 183 184 alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"ä"sv, buf); 185 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 186 ASSERT_EQ(buf.get_string_view(), u"a\u0308"); 187 188 buf.clear(); 189 190 alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"½"sv, buf); 191 ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); 192 ASSERT_EQ(buf.get_string_view(), u"1⁄2"); 193 } 194 195 TEST(IntlString, ComposePairNFC) 196 { 197 // Pair of base characters do not compose 198 ASSERT_EQ(String::ComposePairNFC(U'a', U'b'), U'\0'); 199 // Base letter + accent 200 ASSERT_EQ(String::ComposePairNFC(U'a', U'\u0308'), U'ä'); 201 // Accented letter + a further accent 202 ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0304'), U'ǟ'); 203 // Accented letter + a further accent, but doubly-accented form is not 204 // available 205 ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0301'), U'\0'); 206 // These do not compose because although U+0344 has the decomposition <0308, 207 // 0301> (see below), it also has the Full_Composition_Exclusion property. 208 ASSERT_EQ(String::ComposePairNFC(U'\u0308', U'\u0301'), U'\0'); 209 // Supplementary-plane letter + accent 210 ASSERT_EQ(String::ComposePairNFC(U'\U00011099', U'\U000110BA'), 211 U'\U0001109A'); 212 } 213 214 TEST(IntlString, DecomposeRawNFD) 215 { 216 char32_t buf[2]; 217 // Non-decomposable character 218 ASSERT_EQ(String::DecomposeRawNFD(U'a', buf), 0); 219 // Singleton decomposition 220 ASSERT_EQ(String::DecomposeRawNFD(U'\u212A', buf), 1); 221 ASSERT_EQ(buf[0], U'K'); 222 // Simple accented letter 223 ASSERT_EQ(String::DecomposeRawNFD(U'ä', buf), 2); 224 ASSERT_EQ(buf[0], U'a'); 225 ASSERT_EQ(buf[1], U'\u0308'); 226 // Double-accented letter decomposes by only one level 227 ASSERT_EQ(String::DecomposeRawNFD(U'ǟ', buf), 2); 228 ASSERT_EQ(buf[0], U'ä'); 229 ASSERT_EQ(buf[1], U'\u0304'); 230 // Non-starter can decompose, but will not recompose (see above) 231 ASSERT_EQ(String::DecomposeRawNFD(U'\u0344', buf), 2); 232 ASSERT_EQ(buf[0], U'\u0308'); 233 ASSERT_EQ(buf[1], U'\u0301'); 234 // Supplementary-plane letter with decomposition 235 ASSERT_EQ(String::DecomposeRawNFD(U'\U0001109A', buf), 2); 236 ASSERT_EQ(buf[0], U'\U00011099'); 237 ASSERT_EQ(buf[1], U'\U000110BA'); 238 } 239 240 TEST(IntlString, IsCased) 241 { 242 ASSERT_TRUE(String::IsCased(U'a')); 243 ASSERT_FALSE(String::IsCased(U'0')); 244 } 245 246 TEST(IntlString, IsCaseIgnorable) 247 { 248 ASSERT_FALSE(String::IsCaseIgnorable(U'a')); 249 ASSERT_TRUE(String::IsCaseIgnorable(U'.')); 250 } 251 252 TEST(IntlString, GetUnicodeVersion) 253 { 254 auto version = String::GetUnicodeVersion(); 255 256 ASSERT_TRUE(std::all_of(version.begin(), version.end(), [](char ch) { 257 return IsAsciiDigit(ch) || ch == '.'; 258 })); 259 } 260 261 } // namespace mozilla::intl