tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

TestString.cpp (8482B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #include "gtest/gtest.h"
      5 
      6 #include "mozilla/intl/String.h"
      7 #include "mozilla/Span.h"
      8 #include "mozilla/TextUtils.h"
      9 #include "mozilla/Try.h"
     10 
     11 #include <algorithm>
     12 
     13 #include "TestBuffer.h"
     14 
     15 namespace mozilla::intl {
     16 
     17 static Result<std::u16string_view, ICUError> ToLocaleLowerCase(
     18    const char* aLocale, const char16_t* aString,
     19    TestBuffer<char16_t>& aBuffer) {
     20  aBuffer.clear();
     21 
     22  MOZ_TRY(String::ToLocaleLowerCase(aLocale, MakeStringSpan(aString), aBuffer));
     23 
     24  return aBuffer.get_string_view();
     25 }
     26 
     27 static Result<std::u16string_view, ICUError> ToLocaleUpperCase(
     28    const char* aLocale, const char16_t* aString,
     29    TestBuffer<char16_t>& aBuffer) {
     30  aBuffer.clear();
     31 
     32  MOZ_TRY(String::ToLocaleUpperCase(aLocale, MakeStringSpan(aString), aBuffer));
     33 
     34  return aBuffer.get_string_view();
     35 }
     36 
     37 TEST(IntlString, ToLocaleLowerCase)
     38 {
     39  TestBuffer<char16_t> buf;
     40 
     41  ASSERT_EQ(ToLocaleLowerCase("en", u"test", buf).unwrap(), u"test");
     42  ASSERT_EQ(ToLocaleLowerCase("en", u"TEST", buf).unwrap(), u"test");
     43 
     44  // Turkish dotless i.
     45  ASSERT_EQ(ToLocaleLowerCase("tr", u"I", buf).unwrap(), u"ı");
     46  ASSERT_EQ(ToLocaleLowerCase("tr", u"İ", buf).unwrap(), u"i");
     47  ASSERT_EQ(ToLocaleLowerCase("tr", u"I\u0307", buf).unwrap(), u"i");
     48 }
     49 
     50 TEST(IntlString, ToLocaleUpperCase)
     51 {
     52  TestBuffer<char16_t> buf;
     53 
     54  ASSERT_EQ(ToLocaleUpperCase("en", u"test", buf).unwrap(), u"TEST");
     55  ASSERT_EQ(ToLocaleUpperCase("en", u"TEST", buf).unwrap(), u"TEST");
     56 
     57  // Turkish dotless i.
     58  ASSERT_EQ(ToLocaleUpperCase("tr", u"i", buf).unwrap(), u"İ");
     59  ASSERT_EQ(ToLocaleUpperCase("tr", u"ı", buf).unwrap(), u"I");
     60 
     61  // Output can be longer than the input string.
     62  ASSERT_EQ(ToLocaleUpperCase("en", u"Größenmaßstäbe", buf).unwrap(),
     63            u"GRÖSSENMASSSTÄBE");
     64 }
     65 
     66 TEST(IntlString, NormalizeNFC)
     67 {
     68  using namespace std::literals;
     69 
     70  using NormalizationForm = String::NormalizationForm;
     71  using AlreadyNormalized = String::AlreadyNormalized;
     72 
     73  TestBuffer<char16_t> buf;
     74 
     75  auto alreadyNormalized =
     76      String::Normalize(NormalizationForm::NFC, u""sv, buf);
     77  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
     78  ASSERT_EQ(buf.get_string_view(), u"");
     79 
     80  alreadyNormalized =
     81      String::Normalize(NormalizationForm::NFC, u"abcdef"sv, buf);
     82  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
     83  ASSERT_EQ(buf.get_string_view(), u"");
     84 
     85  alreadyNormalized =
     86      String::Normalize(NormalizationForm::NFC, u"a\u0308"sv, buf);
     87  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
     88  ASSERT_EQ(buf.get_string_view(), u"ä");
     89 
     90  buf.clear();
     91 
     92  alreadyNormalized = String::Normalize(NormalizationForm::NFC, u"½"sv, buf);
     93  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
     94  ASSERT_EQ(buf.get_string_view(), u"");
     95 }
     96 
     97 TEST(IntlString, NormalizeNFD)
     98 {
     99  using namespace std::literals;
    100 
    101  using NormalizationForm = String::NormalizationForm;
    102  using AlreadyNormalized = String::AlreadyNormalized;
    103 
    104  TestBuffer<char16_t> buf;
    105 
    106  auto alreadyNormalized =
    107      String::Normalize(NormalizationForm::NFD, u""sv, buf);
    108  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    109  ASSERT_EQ(buf.get_string_view(), u"");
    110 
    111  alreadyNormalized =
    112      String::Normalize(NormalizationForm::NFD, u"abcdef"sv, buf);
    113  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    114  ASSERT_EQ(buf.get_string_view(), u"");
    115 
    116  alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"ä"sv, buf);
    117  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    118  ASSERT_EQ(buf.get_string_view(), u"a\u0308");
    119 
    120  buf.clear();
    121 
    122  alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"½"sv, buf);
    123  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    124  ASSERT_EQ(buf.get_string_view(), u"");
    125 
    126  // Test with inline capacity.
    127  TestBuffer<char16_t, 2> buf2;
    128 
    129  alreadyNormalized = String::Normalize(NormalizationForm::NFD, u" ç"sv, buf2);
    130  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    131  ASSERT_EQ(buf2.get_string_view(), u" c\u0327");
    132 }
    133 
    134 TEST(IntlString, NormalizeNFKC)
    135 {
    136  using namespace std::literals;
    137 
    138  using NormalizationForm = String::NormalizationForm;
    139  using AlreadyNormalized = String::AlreadyNormalized;
    140 
    141  TestBuffer<char16_t> buf;
    142 
    143  auto alreadyNormalized =
    144      String::Normalize(NormalizationForm::NFKC, u""sv, buf);
    145  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    146  ASSERT_EQ(buf.get_string_view(), u"");
    147 
    148  alreadyNormalized =
    149      String::Normalize(NormalizationForm::NFKC, u"abcdef"sv, buf);
    150  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    151  ASSERT_EQ(buf.get_string_view(), u"");
    152 
    153  alreadyNormalized =
    154      String::Normalize(NormalizationForm::NFKC, u"a\u0308"sv, buf);
    155  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    156  ASSERT_EQ(buf.get_string_view(), u"ä");
    157 
    158  buf.clear();
    159 
    160  alreadyNormalized = String::Normalize(NormalizationForm::NFKC, u"½"sv, buf);
    161  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    162  ASSERT_EQ(buf.get_string_view(), u"1⁄2");
    163 }
    164 
    165 TEST(IntlString, NormalizeNFKD)
    166 {
    167  using namespace std::literals;
    168 
    169  using NormalizationForm = String::NormalizationForm;
    170  using AlreadyNormalized = String::AlreadyNormalized;
    171 
    172  TestBuffer<char16_t> buf;
    173 
    174  auto alreadyNormalized =
    175      String::Normalize(NormalizationForm::NFKD, u""sv, buf);
    176  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    177  ASSERT_EQ(buf.get_string_view(), u"");
    178 
    179  alreadyNormalized =
    180      String::Normalize(NormalizationForm::NFKD, u"abcdef"sv, buf);
    181  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
    182  ASSERT_EQ(buf.get_string_view(), u"");
    183 
    184  alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"ä"sv, buf);
    185  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    186  ASSERT_EQ(buf.get_string_view(), u"a\u0308");
    187 
    188  buf.clear();
    189 
    190  alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"½"sv, buf);
    191  ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
    192  ASSERT_EQ(buf.get_string_view(), u"1⁄2");
    193 }
    194 
    195 TEST(IntlString, ComposePairNFC)
    196 {
    197  // Pair of base characters do not compose
    198  ASSERT_EQ(String::ComposePairNFC(U'a', U'b'), U'\0');
    199  // Base letter + accent
    200  ASSERT_EQ(String::ComposePairNFC(U'a', U'\u0308'), U'ä');
    201  // Accented letter + a further accent
    202  ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0304'), U'ǟ');
    203  // Accented letter + a further accent, but doubly-accented form is not
    204  // available
    205  ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0301'), U'\0');
    206  // These do not compose because although U+0344 has the decomposition <0308,
    207  // 0301> (see below), it also has the Full_Composition_Exclusion property.
    208  ASSERT_EQ(String::ComposePairNFC(U'\u0308', U'\u0301'), U'\0');
    209  // Supplementary-plane letter + accent
    210  ASSERT_EQ(String::ComposePairNFC(U'\U00011099', U'\U000110BA'),
    211            U'\U0001109A');
    212 }
    213 
    214 TEST(IntlString, DecomposeRawNFD)
    215 {
    216  char32_t buf[2];
    217  // Non-decomposable character
    218  ASSERT_EQ(String::DecomposeRawNFD(U'a', buf), 0);
    219  // Singleton decomposition
    220  ASSERT_EQ(String::DecomposeRawNFD(U'\u212A', buf), 1);
    221  ASSERT_EQ(buf[0], U'K');
    222  // Simple accented letter
    223  ASSERT_EQ(String::DecomposeRawNFD(U'ä', buf), 2);
    224  ASSERT_EQ(buf[0], U'a');
    225  ASSERT_EQ(buf[1], U'\u0308');
    226  // Double-accented letter decomposes by only one level
    227  ASSERT_EQ(String::DecomposeRawNFD(U'ǟ', buf), 2);
    228  ASSERT_EQ(buf[0], U'ä');
    229  ASSERT_EQ(buf[1], U'\u0304');
    230  // Non-starter can decompose, but will not recompose (see above)
    231  ASSERT_EQ(String::DecomposeRawNFD(U'\u0344', buf), 2);
    232  ASSERT_EQ(buf[0], U'\u0308');
    233  ASSERT_EQ(buf[1], U'\u0301');
    234  // Supplementary-plane letter with decomposition
    235  ASSERT_EQ(String::DecomposeRawNFD(U'\U0001109A', buf), 2);
    236  ASSERT_EQ(buf[0], U'\U00011099');
    237  ASSERT_EQ(buf[1], U'\U000110BA');
    238 }
    239 
    240 TEST(IntlString, IsCased)
    241 {
    242  ASSERT_TRUE(String::IsCased(U'a'));
    243  ASSERT_FALSE(String::IsCased(U'0'));
    244 }
    245 
    246 TEST(IntlString, IsCaseIgnorable)
    247 {
    248  ASSERT_FALSE(String::IsCaseIgnorable(U'a'));
    249  ASSERT_TRUE(String::IsCaseIgnorable(U'.'));
    250 }
    251 
    252 TEST(IntlString, GetUnicodeVersion)
    253 {
    254  auto version = String::GetUnicodeVersion();
    255 
    256  ASSERT_TRUE(std::all_of(version.begin(), version.end(), [](char ch) {
    257    return IsAsciiDigit(ch) || ch == '.';
    258  }));
    259 }
    260 
    261 }  // namespace mozilla::intl