tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit bcd6c96a05c1cb2527f1b8ac1fb0ce7645b8c37c
parent 0c89daeb02ec6e8c8dea606f2fc9846c4068b7f6
Author: Chris Peterson <cpeterson@mozilla.com>
Date:   Thu, 30 Oct 2025 16:53:16 +0000

Bug 1997025 - Replace u8"" string literals in mfbt and xpcom tests. r=xpcom-reviewers,emilio

u8"" strings have type const char[] in C++17, but type const char8_t[] in C++20. Since most Firefox code is written assuming char or char16_t strings, char8_t strings cause type mismatches that would require additional overloads or string conversions.

Because Firefox .cpp files are compiled as UTF-8, "" string literals are already UTF-8 encoded and the explicit u8 prefix is unnecessary.

By removing the u8"" string literals in the MFBT and xpcom tests, we can remove nsTStringRepr's Equals() overloads for char8_t. They are now unused and casting a char8_t* string pointer to a char* string pointer is UB.

Differential Revision: https://phabricator.services.mozilla.com/D270453

Diffstat:
Mmfbt/tests/TestUtf8.cpp | 103+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mxpcom/string/nsTStringRepr.h | 12------------
Mxpcom/tests/gtest/TestStrings.cpp | 2+-
3 files changed, 52 insertions(+), 65 deletions(-)

diff --git a/mfbt/tests/TestUtf8.cpp b/mfbt/tests/TestUtf8.cpp @@ -241,10 +241,10 @@ static void ExpectBadCodePoint(const Char (&aCharN)[N], static void TestIsUtf8() { // Note we include the U+0000 NULL in this one -- and that's fine. - static const char asciiBytes[] = u8"How about a nice game of chess?"; + static const char asciiBytes[] = "How about a nice game of chess?"; MOZ_RELEASE_ASSERT(IsUtf8(Span(asciiBytes, std::size(asciiBytes)))); - static const char endNonAsciiBytes[] = u8"Life is like a 🌯"; + static const char endNonAsciiBytes[] = "Life is like a 🌯"; MOZ_RELEASE_ASSERT( IsUtf8(Span(endNonAsciiBytes, std::size(endNonAsciiBytes) - 1))); @@ -254,13 +254,13 @@ static void TestIsUtf8() { // Byte-counts // 1 - static const char oneBytes[] = u8"A"; // U+0041 LATIN CAPITAL LETTER A + static const char oneBytes[] = "A"; // U+0041 LATIN CAPITAL LETTER A constexpr size_t oneBytesLen = std::size(oneBytes); static_assert(oneBytesLen == 2, "U+0041 plus nul"); MOZ_RELEASE_ASSERT(IsUtf8(Span(oneBytes, oneBytesLen))); // 2 - static const char twoBytes[] = u8"؆"; // U+0606 ARABIC-INDIC CUBE ROOT + static const char twoBytes[] = "؆"; // U+0606 ARABIC-INDIC CUBE ROOT constexpr size_t twoBytesLen = std::size(twoBytes); static_assert(twoBytesLen == 3, "U+0606 in two bytes plus nul"); MOZ_RELEASE_ASSERT(IsUtf8(Span(twoBytes, twoBytesLen))); @@ -268,7 +268,7 @@ static void TestIsUtf8() { ExpectValidCodePoint(twoBytes, 0x0606); // 3 - static const char threeBytes[] = u8"᨞"; // U+1A1E BUGINESE PALLAWA + static const char threeBytes[] = "᨞"; // U+1A1E BUGINESE PALLAWA constexpr size_t threeBytesLen = std::size(threeBytes); static_assert(threeBytesLen == 4, "U+1A1E in three bytes plus nul"); MOZ_RELEASE_ASSERT(IsUtf8(Span(threeBytes, threeBytesLen))); @@ -276,8 +276,7 @@ static void TestIsUtf8() { ExpectValidCodePoint(threeBytes, 0x1A1E); // 4 - static const char fourBytes[] = - u8"🁡"; // U+1F061 DOMINO TILE HORIZONTAL-06-06 + static const char fourBytes[] = "🁡"; // U+1F061 DOMINO TILE HORIZONTAL-06-06 constexpr size_t fourBytesLen = std::size(fourBytes); static_assert(fourBytesLen == 5, "U+1F061 in four bytes plus nul"); MOZ_RELEASE_ASSERT(IsUtf8(Span(fourBytes, fourBytesLen))); @@ -285,7 +284,7 @@ static void TestIsUtf8() { ExpectValidCodePoint(fourBytes, 0x1F061); // Max code point - static const char maxCodePoint[] = u8"􏿿"; // U+10FFFF + static const char maxCodePoint[] = "􏿿"; // U+10FFFF constexpr size_t maxCodePointLen = std::size(maxCodePoint); static_assert(maxCodePointLen == 5, "U+10FFFF in four bytes plus nul"); MOZ_RELEASE_ASSERT(IsUtf8(Span(maxCodePoint, maxCodePointLen))); @@ -357,63 +356,63 @@ static void TestDecodeOneValidUtf8CodePoint() { // Length two. - ExpectValidCodePoint(u8"€", 0x80); // <control> - ExpectValidCodePoint(u8"©", 0xA9); // COPYRIGHT SIGN - ExpectValidCodePoint(u8"¶", 0xB6); // PILCROW SIGN - ExpectValidCodePoint(u8"¾", 0xBE); // VULGAR FRACTION THREE QUARTERS - ExpectValidCodePoint(u8"÷", 0xF7); // DIVISION SIGN - ExpectValidCodePoint(u8"ÿ", 0xFF); // LATIN SMALL LETTER Y WITH DIAERESIS - ExpectValidCodePoint(u8"Ā", 0x100); // LATIN CAPITAL LETTER A WITH MACRON - ExpectValidCodePoint(u8"IJ", 0x132); // LATIN CAPITAL LETTER LIGATURE IJ - ExpectValidCodePoint(u8"ͼ", 0x37C); // GREEK SMALL DOTTED LUNATE SIGMA SYMBOL - ExpectValidCodePoint(u8"Ӝ", + ExpectValidCodePoint("€", 0x80); // <control> + ExpectValidCodePoint("©", 0xA9); // COPYRIGHT SIGN + ExpectValidCodePoint("¶", 0xB6); // PILCROW SIGN + ExpectValidCodePoint("¾", 0xBE); // VULGAR FRACTION THREE QUARTERS + ExpectValidCodePoint("÷", 0xF7); // DIVISION SIGN + ExpectValidCodePoint("ÿ", 0xFF); // LATIN SMALL LETTER Y WITH DIAERESIS + ExpectValidCodePoint("Ā", 0x100); // LATIN CAPITAL LETTER A WITH MACRON + ExpectValidCodePoint("IJ", 0x132); // LATIN CAPITAL LETTER LIGATURE IJ + ExpectValidCodePoint("ͼ", 0x37C); // GREEK SMALL DOTTED LUNATE SIGMA SYMBOL + ExpectValidCodePoint("Ӝ", 0x4DC); // CYRILLIC CAPITAL LETTER ZHE WITTH DIAERESIS - ExpectValidCodePoint(u8"۩", 0x6E9); // ARABIC PLACE OF SAJDAH - ExpectValidCodePoint(u8"߿", 0x7FF); // <not assigned> + ExpectValidCodePoint("۩", 0x6E9); // ARABIC PLACE OF SAJDAH + ExpectValidCodePoint("߿", 0x7FF); // <not assigned> // Length three. - ExpectValidCodePoint(u8"ࠀ", 0x800); // SAMARITAN LETTER ALAF - ExpectValidCodePoint(u8"ࡁ", 0x841); // MANDAIC LETTER AB - ExpectValidCodePoint(u8"ࣿ", 0x8FF); // ARABIC MARK SIDEWAYS NOON GHUNNA - ExpectValidCodePoint(u8"ஆ", 0xB86); // TAMIL LETTER AA - ExpectValidCodePoint(u8"༃", + ExpectValidCodePoint("ࠀ", 0x800); // SAMARITAN LETTER ALAF + ExpectValidCodePoint("ࡁ", 0x841); // MANDAIC LETTER AB + ExpectValidCodePoint("ࣿ", 0x8FF); // ARABIC MARK SIDEWAYS NOON GHUNNA + ExpectValidCodePoint("ஆ", 0xB86); // TAMIL LETTER AA + ExpectValidCodePoint("༃", 0xF03); // TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA ExpectValidCodePoint( - u8"࿉", + "࿉", 0xFC9); // TIBETAN SYMBOL NOR BU (but on my system it really looks like // SOFT-SERVE ICE CREAM FROM ABOVE THE PLANE if you ask me) - ExpectValidCodePoint(u8"ဪ", 0x102A); // MYANMAR LETTER AU - ExpectValidCodePoint(u8"ᚏ", 0x168F); // OGHAM LETTER RUIS + ExpectValidCodePoint("ဪ", 0x102A); // MYANMAR LETTER AU + ExpectValidCodePoint("ᚏ", 0x168F); // OGHAM LETTER RUIS ExpectValidCodePoint("\xE2\x80\xA8", 0x2028); // (the hated) LINE SEPARATOR ExpectValidCodePoint("\xE2\x80\xA9", - 0x2029); // (the hated) PARAGRAPH SEPARATOR - ExpectValidCodePoint(u8"☬", 0x262C); // ADI SHAKTI - ExpectValidCodePoint(u8"㊮", 0x32AE); // CIRCLED IDEOGRAPH RESOURCE - ExpectValidCodePoint(u8"㏖", 0x33D6); // SQUARE MOL - ExpectValidCodePoint(u8"ꔄ", 0xA504); // VAI SYLLABLE WEEN - ExpectValidCodePoint(u8"ퟕ", 0xD7D5); // HANGUL JONGSEONG RIEUL-SSANGKIYEOK - ExpectValidCodePoint(u8"퟿", 0xD7FF); // <not assigned> - ExpectValidCodePoint(u8"", 0xE000); // <Private Use> - ExpectValidCodePoint(u8"鱗", 0xF9F2); // CJK COMPATIBILITY IDEOGRAPH-F9F + 0x2029); // (the hated) PARAGRAPH SEPARATOR + ExpectValidCodePoint("☬", 0x262C); // ADI SHAKTI + ExpectValidCodePoint("㊮", 0x32AE); // CIRCLED IDEOGRAPH RESOURCE + ExpectValidCodePoint("㏖", 0x33D6); // SQUARE MOL + ExpectValidCodePoint("ꔄ", 0xA504); // VAI SYLLABLE WEEN + ExpectValidCodePoint("ퟕ", 0xD7D5); // HANGUL JONGSEONG RIEUL-SSANGKIYEOK + ExpectValidCodePoint("퟿", 0xD7FF); // <not assigned> + ExpectValidCodePoint("", 0xE000); // <Private Use> + ExpectValidCodePoint("鱗", 0xF9F2); // CJK COMPATIBILITY IDEOGRAPH-F9F ExpectValidCodePoint( - u8"﷽", 0xFDFD); // ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHHHEEEEM - ExpectValidCodePoint(u8"￿", 0xFFFF); // <not assigned> + "﷽", 0xFDFD); // ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHHHEEEEM + ExpectValidCodePoint("￿", 0xFFFF); // <not assigned> // Length four. - ExpectValidCodePoint(u8"𐀀", 0x10000); // LINEAR B SYLLABLE B008 A - ExpectValidCodePoint(u8"𔑀", 0x14440); // ANATOLIAN HIEROGLYPH A058 - ExpectValidCodePoint(u8"𝛗", 0x1D6D7); // MATHEMATICAL BOLD SMALL PHI - ExpectValidCodePoint(u8"💩", 0x1F4A9); // PILE OF POO - ExpectValidCodePoint(u8"🔫", 0x1F52B); // PISTOL - ExpectValidCodePoint(u8"🥌", 0x1F94C); // CURLING STONE - ExpectValidCodePoint(u8"🥏", 0x1F94F); // FLYING DISC - ExpectValidCodePoint(u8"𠍆", 0x20346); // CJK UNIFIED IDEOGRAPH-20346 - ExpectValidCodePoint(u8"𡠺", 0x2183A); // CJK UNIFIED IDEOGRAPH-2183A - ExpectValidCodePoint(u8"񁟶", 0x417F6); // <not assigned> - ExpectValidCodePoint(u8"񾠶", 0x7E836); // <not assigned> - ExpectValidCodePoint(u8"󾽧", 0xFEF67); // <Plane 15 Private Use> - ExpectValidCodePoint(u8"􏿿", 0x10FFFF); // + ExpectValidCodePoint("𐀀", 0x10000); // LINEAR B SYLLABLE B008 A + ExpectValidCodePoint("𔑀", 0x14440); // ANATOLIAN HIEROGLYPH A058 + ExpectValidCodePoint("𝛗", 0x1D6D7); // MATHEMATICAL BOLD SMALL PHI + ExpectValidCodePoint("💩", 0x1F4A9); // PILE OF POO + ExpectValidCodePoint("🔫", 0x1F52B); // PISTOL + ExpectValidCodePoint("🥌", 0x1F94C); // CURLING STONE + ExpectValidCodePoint("🥏", 0x1F94F); // FLYING DISC + ExpectValidCodePoint("𠍆", 0x20346); // CJK UNIFIED IDEOGRAPH-20346 + ExpectValidCodePoint("𡠺", 0x2183A); // CJK UNIFIED IDEOGRAPH-2183A + ExpectValidCodePoint("񁟶", 0x417F6); // <not assigned> + ExpectValidCodePoint("񾠶", 0x7E836); // <not assigned> + ExpectValidCodePoint("󾽧", 0xFEF67); // <Plane 15 Private Use> + ExpectValidCodePoint("􏿿", 0x10FFFF); // } static void TestDecodeBadLeadUnit() { diff --git a/xpcom/string/nsTStringRepr.h b/xpcom/string/nsTStringRepr.h @@ -253,18 +253,6 @@ class nsTStringRepr { */ bool EqualsIgnoreCase(const std::string_view& aString) const; -#ifdef __cpp_char8_t - template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> - bool NS_FASTCALL Equals(const char8_t* aData) const { - return Equals(reinterpret_cast<const char*>(aData)); - } - - template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>> - bool NS_FASTCALL Equals(const char8_t* aData, comparator_type aComp) const { - return Equals(reinterpret_cast<const char*>(aData), aComp); - } -#endif - #if defined(MOZ_USE_CHAR16_WRAPPER) template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>> bool NS_FASTCALL Equals(char16ptr_t aData) const { diff --git a/xpcom/tests/gtest/TestStrings.cpp b/xpcom/tests/gtest/TestStrings.cpp @@ -1464,7 +1464,7 @@ TEST_F(Strings, bulk_write_fail) { EXPECT_TRUE(handleOrErr.isOk()); } EXPECT_EQ(s.Length(), 3U); - EXPECT_TRUE(s.Equals(u8"\uFFFD")); + EXPECT_TRUE(s.Equals("\uFFFD")); } TEST_F(Strings, huge_capacity) {