[ tor-browser ].git.dasho

commit bcd6c96a05c1cb2527f1b8ac1fb0ce7645b8c37c
parent 0c89daeb02ec6e8c8dea606f2fc9846c4068b7f6
Author: Chris Peterson <cpeterson@mozilla.com>
Date:   Thu, 30 Oct 2025 16:53:16 +0000

Bug 1997025 - Replace u8"" string literals in mfbt and xpcom tests. r=xpcom-reviewers,emilio

u8"" strings have type const char[] in C++17, but type const char8_t[] in C++20. Since most Firefox code is written assuming char or char16_t strings, char8_t strings cause type mismatches that would require additional overloads or string conversions.

Because Firefox .cpp files are compiled as UTF-8, "" string literals are already UTF-8 encoded and the explicit u8 prefix is unnecessary.

By removing the u8"" string literals in the MFBT and xpcom tests, we can remove nsTStringRepr's Equals() overloads for char8_t. They are now unused and casting a char8_t* string pointer to a char* string pointer is UB.

Differential Revision: https://phabricator.services.mozilla.com/D270453

Diffstat:
M mfbt/tests/TestUtf8.cpp  | 103 +++++++++++++++++++++++++++++++++++++++----------------------------------------
M xpcom/string/nsTStringRepr.h  | 12 ------------
M xpcom/tests/gtest/TestStrings.cpp  | 2 +-

3 files changed, 52 insertions(+), 65 deletions(-)
diff --git a/mfbt/tests/TestUtf8.cpp b/mfbt/tests/TestUtf8.cpp
@@ -241,10 +241,10 @@ static void ExpectBadCodePoint(const Char (&aCharN)[N],
 
 static void TestIsUtf8() {
   // Note we include the U+0000 NULL in this one -- and that's fine.
-  static const char asciiBytes[] = u8"How about a nice game of chess?";
+  static const char asciiBytes[] = "How about a nice game of chess?";
   MOZ_RELEASE_ASSERT(IsUtf8(Span(asciiBytes, std::size(asciiBytes))));
 
-  static const char endNonAsciiBytes[] = u8"Life is like a 🌯";
+  static const char endNonAsciiBytes[] = "Life is like a 🌯";
   MOZ_RELEASE_ASSERT(
       IsUtf8(Span(endNonAsciiBytes, std::size(endNonAsciiBytes) - 1)));
 
@@ -254,13 +254,13 @@ static void TestIsUtf8() {
   // Byte-counts
 
   // 1
-  static const char oneBytes[] = u8"A";  // U+0041 LATIN CAPITAL LETTER A
+  static const char oneBytes[] = "A";  // U+0041 LATIN CAPITAL LETTER A
   constexpr size_t oneBytesLen = std::size(oneBytes);
   static_assert(oneBytesLen == 2, "U+0041 plus nul");
   MOZ_RELEASE_ASSERT(IsUtf8(Span(oneBytes, oneBytesLen)));
 
   // 2
-  static const char twoBytes[] = u8"؆";  // U+0606 ARABIC-INDIC CUBE ROOT
+  static const char twoBytes[] = "؆";  // U+0606 ARABIC-INDIC CUBE ROOT
   constexpr size_t twoBytesLen = std::size(twoBytes);
   static_assert(twoBytesLen == 3, "U+0606 in two bytes plus nul");
   MOZ_RELEASE_ASSERT(IsUtf8(Span(twoBytes, twoBytesLen)));
@@ -268,7 +268,7 @@ static void TestIsUtf8() {
   ExpectValidCodePoint(twoBytes, 0x0606);
 
   // 3
-  static const char threeBytes[] = u8"᨞";  // U+1A1E BUGINESE PALLAWA
+  static const char threeBytes[] = "᨞";  // U+1A1E BUGINESE PALLAWA
   constexpr size_t threeBytesLen = std::size(threeBytes);
   static_assert(threeBytesLen == 4, "U+1A1E in three bytes plus nul");
   MOZ_RELEASE_ASSERT(IsUtf8(Span(threeBytes, threeBytesLen)));
@@ -276,8 +276,7 @@ static void TestIsUtf8() {
   ExpectValidCodePoint(threeBytes, 0x1A1E);
 
   // 4
-  static const char fourBytes[] =
-      u8"🁡";  // U+1F061 DOMINO TILE HORIZONTAL-06-06
+  static const char fourBytes[] = "🁡";  // U+1F061 DOMINO TILE HORIZONTAL-06-06
   constexpr size_t fourBytesLen = std::size(fourBytes);
   static_assert(fourBytesLen == 5, "U+1F061 in four bytes plus nul");
   MOZ_RELEASE_ASSERT(IsUtf8(Span(fourBytes, fourBytesLen)));
@@ -285,7 +284,7 @@ static void TestIsUtf8() {
   ExpectValidCodePoint(fourBytes, 0x1F061);
 
   // Max code point
-  static const char maxCodePoint[] = u8"􏿿";  // U+10FFFF
+  static const char maxCodePoint[] = "􏿿";  // U+10FFFF
   constexpr size_t maxCodePointLen = std::size(maxCodePoint);
   static_assert(maxCodePointLen == 5, "U+10FFFF in four bytes plus nul");
   MOZ_RELEASE_ASSERT(IsUtf8(Span(maxCodePoint, maxCodePointLen)));
@@ -357,63 +356,63 @@ static void TestDecodeOneValidUtf8CodePoint() {
 
   // Length two.
 
-  ExpectValidCodePoint(u8"", 0x80);  // <control>
-  ExpectValidCodePoint(u8"©", 0xA9);   // COPYRIGHT SIGN
-  ExpectValidCodePoint(u8"¶", 0xB6);   // PILCROW SIGN
-  ExpectValidCodePoint(u8"¾", 0xBE);   // VULGAR FRACTION THREE QUARTERS
-  ExpectValidCodePoint(u8"÷", 0xF7);   // DIVISION SIGN
-  ExpectValidCodePoint(u8"ÿ", 0xFF);   // LATIN SMALL LETTER Y WITH DIAERESIS
-  ExpectValidCodePoint(u8"Ā", 0x100);  // LATIN CAPITAL LETTER A WITH MACRON
-  ExpectValidCodePoint(u8"Ĳ", 0x132);  // LATIN CAPITAL LETTER LIGATURE IJ
-  ExpectValidCodePoint(u8"ͼ", 0x37C);  // GREEK SMALL DOTTED LUNATE SIGMA SYMBOL
-  ExpectValidCodePoint(u8"Ӝ",
+  ExpectValidCodePoint("", 0x80);  // <control>
+  ExpectValidCodePoint("©", 0xA9);   // COPYRIGHT SIGN
+  ExpectValidCodePoint("¶", 0xB6);   // PILCROW SIGN
+  ExpectValidCodePoint("¾", 0xBE);   // VULGAR FRACTION THREE QUARTERS
+  ExpectValidCodePoint("÷", 0xF7);   // DIVISION SIGN
+  ExpectValidCodePoint("ÿ", 0xFF);   // LATIN SMALL LETTER Y WITH DIAERESIS
+  ExpectValidCodePoint("Ā", 0x100);  // LATIN CAPITAL LETTER A WITH MACRON
+  ExpectValidCodePoint("Ĳ", 0x132);  // LATIN CAPITAL LETTER LIGATURE IJ
+  ExpectValidCodePoint("ͼ", 0x37C);  // GREEK SMALL DOTTED LUNATE SIGMA SYMBOL
+  ExpectValidCodePoint("Ӝ",
                        0x4DC);  // CYRILLIC CAPITAL LETTER ZHE WITTH DIAERESIS
-  ExpectValidCodePoint(u8"۩", 0x6E9);  // ARABIC PLACE OF SAJDAH
-  ExpectValidCodePoint(u8"߿", 0x7FF);  // <not assigned>
+  ExpectValidCodePoint("۩", 0x6E9);  // ARABIC PLACE OF SAJDAH
+  ExpectValidCodePoint("߿", 0x7FF);  // <not assigned>
 
   // Length three.
 
-  ExpectValidCodePoint(u8"ࠀ", 0x800);  // SAMARITAN LETTER ALAF
-  ExpectValidCodePoint(u8"ࡁ", 0x841);  // MANDAIC LETTER AB
-  ExpectValidCodePoint(u8"ࣿ", 0x8FF);   // ARABIC MARK SIDEWAYS NOON GHUNNA
-  ExpectValidCodePoint(u8"ஆ", 0xB86);  // TAMIL LETTER AA
-  ExpectValidCodePoint(u8"༃",
+  ExpectValidCodePoint("ࠀ", 0x800);  // SAMARITAN LETTER ALAF
+  ExpectValidCodePoint("ࡁ", 0x841);  // MANDAIC LETTER AB
+  ExpectValidCodePoint("ࣿ", 0x8FF);   // ARABIC MARK SIDEWAYS NOON GHUNNA
+  ExpectValidCodePoint("ஆ", 0xB86);  // TAMIL LETTER AA
+  ExpectValidCodePoint("༃",
                        0xF03);  // TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
   ExpectValidCodePoint(
-      u8"࿉",
+      "࿉",
       0xFC9);  // TIBETAN SYMBOL NOR BU (but on my system it really looks like
                // SOFT-SERVE ICE CREAM FROM ABOVE THE PLANE if you ask me)
-  ExpectValidCodePoint(u8"ဪ", 0x102A);           // MYANMAR LETTER AU
-  ExpectValidCodePoint(u8"ᚏ", 0x168F);           // OGHAM LETTER RUIS
+  ExpectValidCodePoint("ဪ", 0x102A);             // MYANMAR LETTER AU
+  ExpectValidCodePoint("ᚏ", 0x168F);             // OGHAM LETTER RUIS
   ExpectValidCodePoint("\xE2\x80\xA8", 0x2028);  // (the hated) LINE SEPARATOR
   ExpectValidCodePoint("\xE2\x80\xA9",
-                       0x2029);           // (the hated) PARAGRAPH SEPARATOR
-  ExpectValidCodePoint(u8"☬", 0x262C);    // ADI SHAKTI
-  ExpectValidCodePoint(u8"㊮", 0x32AE);   // CIRCLED IDEOGRAPH RESOURCE
-  ExpectValidCodePoint(u8"㏖", 0x33D6);   // SQUARE MOL
-  ExpectValidCodePoint(u8"ꔄ", 0xA504);    // VAI SYLLABLE WEEN
-  ExpectValidCodePoint(u8"ퟕ", 0xD7D5);    // HANGUL JONGSEONG RIEUL-SSANGKIYEOK
-  ExpectValidCodePoint(u8"퟿", 0xD7FF);  // <not assigned>
-  ExpectValidCodePoint(u8"", 0xE000);  // <Private Use>
-  ExpectValidCodePoint(u8"鱗", 0xF9F2);   // CJK COMPATIBILITY IDEOGRAPH-F9F
+                       0x2029);         // (the hated) PARAGRAPH SEPARATOR
+  ExpectValidCodePoint("☬", 0x262C);    // ADI SHAKTI
+  ExpectValidCodePoint("㊮", 0x32AE);   // CIRCLED IDEOGRAPH RESOURCE
+  ExpectValidCodePoint("㏖", 0x33D6);   // SQUARE MOL
+  ExpectValidCodePoint("ꔄ", 0xA504);    // VAI SYLLABLE WEEN
+  ExpectValidCodePoint("ퟕ", 0xD7D5);    // HANGUL JONGSEONG RIEUL-SSANGKIYEOK
+  ExpectValidCodePoint("퟿", 0xD7FF);  // <not assigned>
+  ExpectValidCodePoint("", 0xE000);  // <Private Use>
+  ExpectValidCodePoint("鱗", 0xF9F2);   // CJK COMPATIBILITY IDEOGRAPH-F9F
   ExpectValidCodePoint(
-      u8"﷽", 0xFDFD);  // ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHHHEEEEM
-  ExpectValidCodePoint(u8"", 0xFFFF);  // <not assigned>
+      "﷽", 0xFDFD);  // ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHHHEEEEM
+  ExpectValidCodePoint("", 0xFFFF);  // <not assigned>
 
   // Length four.
-  ExpectValidCodePoint(u8"𐀀", 0x10000);      // LINEAR B SYLLABLE B008 A
-  ExpectValidCodePoint(u8"𔑀", 0x14440);      // ANATOLIAN HIEROGLYPH A058
-  ExpectValidCodePoint(u8"𝛗", 0x1D6D7);      // MATHEMATICAL BOLD SMALL PHI
-  ExpectValidCodePoint(u8"💩", 0x1F4A9);     // PILE OF POO
-  ExpectValidCodePoint(u8"🔫", 0x1F52B);     // PISTOL
-  ExpectValidCodePoint(u8"🥌", 0x1F94C);     // CURLING STONE
-  ExpectValidCodePoint(u8"🥏", 0x1F94F);     // FLYING DISC
-  ExpectValidCodePoint(u8"𠍆", 0x20346);     // CJK UNIFIED IDEOGRAPH-20346
-  ExpectValidCodePoint(u8"𡠺", 0x2183A);     // CJK UNIFIED IDEOGRAPH-2183A
-  ExpectValidCodePoint(u8"񁟶", 0x417F6);   // <not assigned>
-  ExpectValidCodePoint(u8"񾠶", 0x7E836);   // <not assigned>
-  ExpectValidCodePoint(u8"󾽧", 0xFEF67);   // <Plane 15 Private Use>
-  ExpectValidCodePoint(u8"􏿿", 0x10FFFF);  //
+  ExpectValidCodePoint("𐀀", 0x10000);      // LINEAR B SYLLABLE B008 A
+  ExpectValidCodePoint("𔑀", 0x14440);      // ANATOLIAN HIEROGLYPH A058
+  ExpectValidCodePoint("𝛗", 0x1D6D7);      // MATHEMATICAL BOLD SMALL PHI
+  ExpectValidCodePoint("💩", 0x1F4A9);     // PILE OF POO
+  ExpectValidCodePoint("🔫", 0x1F52B);     // PISTOL
+  ExpectValidCodePoint("🥌", 0x1F94C);     // CURLING STONE
+  ExpectValidCodePoint("🥏", 0x1F94F);     // FLYING DISC
+  ExpectValidCodePoint("𠍆", 0x20346);     // CJK UNIFIED IDEOGRAPH-20346
+  ExpectValidCodePoint("𡠺", 0x2183A);     // CJK UNIFIED IDEOGRAPH-2183A
+  ExpectValidCodePoint("񁟶", 0x417F6);   // <not assigned>
+  ExpectValidCodePoint("񾠶", 0x7E836);   // <not assigned>
+  ExpectValidCodePoint("󾽧", 0xFEF67);   // <Plane 15 Private Use>
+  ExpectValidCodePoint("􏿿", 0x10FFFF);  //
 }
 
 static void TestDecodeBadLeadUnit() {
diff --git a/xpcom/string/nsTStringRepr.h b/xpcom/string/nsTStringRepr.h
@@ -253,18 +253,6 @@ class nsTStringRepr {
    */
   bool EqualsIgnoreCase(const std::string_view& aString) const;
 
-#ifdef __cpp_char8_t
-  template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>>
-  bool NS_FASTCALL Equals(const char8_t* aData) const {
-    return Equals(reinterpret_cast<const char*>(aData));
-  }
-
-  template <typename Q = T, typename EnableIfChar = mozilla::CharOnlyT<Q>>
-  bool NS_FASTCALL Equals(const char8_t* aData, comparator_type aComp) const {
-    return Equals(reinterpret_cast<const char*>(aData), aComp);
-  }
-#endif
-
 #if defined(MOZ_USE_CHAR16_WRAPPER)
   template <typename Q = T, typename EnableIfChar16 = Char16OnlyT<Q>>
   bool NS_FASTCALL Equals(char16ptr_t aData) const {
diff --git a/xpcom/tests/gtest/TestStrings.cpp b/xpcom/tests/gtest/TestStrings.cpp
@@ -1464,7 +1464,7 @@ TEST_F(Strings, bulk_write_fail) {
     EXPECT_TRUE(handleOrErr.isOk());
   }
   EXPECT_EQ(s.Length(), 3U);
-  EXPECT_TRUE(s.Equals(u8"\uFFFD"));
+  EXPECT_TRUE(s.Equals("\uFFFD"));
 }
 
 TEST_F(Strings, huge_capacity) {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	mfbt/tests/TestUtf8.cpp	\|	103	+++++++++++++++++++++++++++++++++++++++----------------------------------------
M	xpcom/string/nsTStringRepr.h	\|	12	------------
M	xpcom/tests/gtest/TestStrings.cpp	\|	2	+-