[ tor-browser ].git.dasho

commit 70e6d16fa38a412a54621178927ca82232a59341
parent ff7acc27f88d8c020418ef62cf1a7b6fea61984d
Author: Edgar Chen <echen@mozilla.com>
Date:   Wed, 10 Dec 2025 16:02:27 +0000

Bug 2004603 - Avoid encoding for `text/html` twice for copying selection; r=hsivonen

The only remaining purpose of the additional encoding is to detect XHTML documents,
but I think we can handle XHTML documents without doing that.

Differential Revision: https://phabricator.services.mozilla.com/D275398

Diffstat:
M dom/base/nsCopySupport.cpp  | 154 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M dom/serializers/nsDocumentEncoder.cpp  | 24 ++++++++++++------------

2 files changed, 91 insertions(+), 87 deletions(-)
diff --git a/dom/base/nsCopySupport.cpp b/dom/base/nsCopySupport.cpp
@@ -83,18 +83,19 @@ static nsresult AppendImagePromise(nsITransferable* aTransferable,
                                    nsINode* aImageNode);
 #endif
 
-static nsresult EncodeForTextUnicode(nsIDocumentEncoder& aEncoder,
-                                     Document& aDocument, Selection* aSelection,
-                                     uint32_t aAdditionalEncoderFlags,
-                                     bool& aEncodedAsTextHTMLResult,
-                                     nsAutoString& aSerializationResult) {
-  // note that we assign text/unicode as mime type, but in fact
-  // nsHTMLCopyEncoder ignore it and use text/html or text/plain depending where
-  // the selection is. if it is a selection into input/textarea element or in a
-  // html content with pre-wrap style : text/plain. Otherwise text/html. see
-  // nsHTMLCopyEncoder::SetSelection
+static nsresult EncodeForTextPlain(nsIDocumentEncoder& aEncoder,
+                                   Document& aDocument, Selection* aSelection,
+                                   uint32_t aAdditionalEncoderFlags,
+                                   bool& aCanBeEncodedAsTextHTML,
+                                   nsAString& aSerializationResult) {
+  // We assign text/html as the MIME type first, but in fact nsHTMLCopyEncoder
+  // force the use of text/plain depending where the selection is (e.g., a
+  // selection inside an <input> or <textarea> element). See
+  // nsHTMLCopyEncoder::SetSelection. We can then use this behavior to detect
+  // whether the selection can be encoded as text/html by checking the MIME type
+  // after nsHTMLCopyEncoder::SetSelection.
   nsAutoString mimeType;
-  mimeType.AssignLiteral("text/unicode");
+  mimeType.AssignLiteral(kHTMLMime);
 
   // Do the first and potentially trial encoding as preformatted and raw.
   uint32_t flags = aAdditionalEncoderFlags |
@@ -113,45 +114,51 @@ static nsresult EncodeForTextUnicode(nsIDocumentEncoder& aEncoder,
   // text widget.
   rv = aEncoder.GetMimeType(mimeType);
   NS_ENSURE_SUCCESS(rv, rv);
-  bool selForcedTextPlain = mimeType.EqualsLiteral(kTextMime);
-
-  nsAutoString buf;
-  rv = aEncoder.EncodeToString(buf);
-  NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = aEncoder.GetMimeType(mimeType);
-  NS_ENSURE_SUCCESS(rv, rv);
+  // XXX: For XHTML documents, we would like to use pretty-printing encoding for
+  // text/plain, just as we do for HTML documents. This is achieved by
+  // relying on the current nsHTMLCopyEncoder design, where the MIME
+  // type is not updated to text/plain immediately in
+  // nsHTMLCopyEncoder::SetSelection(), but only latter when
+  // nsHTMLCopyEncoder::EncodeToString() is called. As a result, we still see a
+  // text/html MIME type here for XHTML documents.
+  if (mimeType.EqualsLiteral(kTextMime)) {
+    // nsHTMLCopyEncoder force to use text/plain.
+    nsAutoString buf;
+    rv = aEncoder.EncodeToString(buf);
+    if (NS_SUCCEEDED(rv)) {
+      // Nothing to do. buf contains the final, preformatted, raw text/plain.
+      aSerializationResult.Assign(buf);
+    }
+    return rv;
+  }
 
-  // The mime type is ultimately text/html if the encoder successfully encoded
-  // the selection as text/html.
-  aEncodedAsTextHTMLResult = mimeType.EqualsLiteral(kHTMLMime);
+  MOZ_ASSERT(mimeType.EqualsLiteral(kHTMLMime));
+  // XXX: We currently only try to encode as text/html for HTML documents.
+  // See bug 857915.
+  if (aDocument.IsHTMLDocument()) {
+    aCanBeEncodedAsTextHTML = true;
+  }
 
-  if (selForcedTextPlain) {
-    // Nothing to do.  buf contains the final, preformatted, raw text/plain.
-    aSerializationResult.Assign(buf);
-  } else {
-    // Redo the encoding, but this time use pretty printing.
-    flags = nsIDocumentEncoder::OutputSelectionOnly |
-            nsIDocumentEncoder::OutputForPlainTextClipboardCopy |
-            nsIDocumentEncoder::OutputAbsoluteLinks |
-            nsIDocumentEncoder::SkipInvisibleContent |
-            nsIDocumentEncoder::OutputDropInvisibleBreak |
-            (aAdditionalEncoderFlags &
-             (nsIDocumentEncoder::OutputNoScriptContent |
-              nsIDocumentEncoder::OutputRubyAnnotation |
-              nsIDocumentEncoder::AllowCrossShadowBoundary));
-
-    mimeType.AssignLiteral(kTextMime);
-    rv = aEncoder.Init(&aDocument, mimeType, flags);
-    NS_ENSURE_SUCCESS(rv, rv);
+  // Do the text/plain encoding, but this time use pretty printing.
+  flags = nsIDocumentEncoder::OutputSelectionOnly |
+          nsIDocumentEncoder::OutputForPlainTextClipboardCopy |
+          nsIDocumentEncoder::OutputAbsoluteLinks |
+          nsIDocumentEncoder::SkipInvisibleContent |
+          nsIDocumentEncoder::OutputDropInvisibleBreak |
+          (aAdditionalEncoderFlags &
+           (nsIDocumentEncoder::OutputNoScriptContent |
+            nsIDocumentEncoder::OutputRubyAnnotation |
+            nsIDocumentEncoder::AllowCrossShadowBoundary));
 
-    rv = aEncoder.SetSelection(aSelection);
-    NS_ENSURE_SUCCESS(rv, rv);
+  mimeType.AssignLiteral(kTextMime);
+  rv = aEncoder.Init(&aDocument, mimeType, flags);
+  NS_ENSURE_SUCCESS(rv, rv);
 
-    rv = aEncoder.EncodeToString(aSerializationResult);
-    NS_ENSURE_SUCCESS(rv, rv);
-  }
+  rv = aEncoder.SetSelection(aSelection);
+  NS_ENSURE_SUCCESS(rv, rv);
 
+  rv = aEncoder.EncodeToString(aSerializationResult);
   return rv;
 }
 
@@ -174,26 +181,21 @@ static nsresult EncodeAsTextHTMLWithContext(
 }
 
 struct EncodedDocumentWithContext {
-  // When determining `mSerializationForTextUnicode`, `text/unicode` is passed
-  // as mime type to the encoder. It uses this as a switch to decide whether to
-  // encode the document as `text/html` or `text/plain`. It  is `true` iff
-  // `text/html` was used.
-  bool mUnicodeEncodingIsTextHTML = false;
-
-  // The serialized document when encoding the document with `text/unicode`. See
-  // comment of `mUnicodeEncodingIsTextHTML`.
-  nsAutoString mSerializationForTextUnicode;
-
-  // When `mUnicodeEncodingIsTextHTML` is true, this is the serialized document
-  // using `text/html`. Its value may differ from `mSerializationForTextHTML`,
-  // because different flags were passed to the encoder.
+  // Whether the document can be encoded as text/html.
+  bool mCanBeEncodedAsTextHTML = false;
+
+  // The serialized document when encoding the document with `text/plain`.
+  nsAutoString mSerializationForTextPlain;
+
+  // When `mCanBeEncodedAsTextHTML` is true, this is the serialized document
+  // using `text/html`.
   nsAutoString mSerializationForTextHTML;
 
-  // When `mUnicodeEncodingIsTextHTML` is true, this contains the serialized
+  // When `mCanBeEncodedAsTextHTML` is true, this contains the serialized
   // ancestor elements.
   nsAutoString mHTMLContextBuffer;
 
-  // When `mUnicodeEncodingIsTextHTML` is true, this contains numbers
+  // When `mCanBeEncodedAsTextHTML` is true, this contains numbers
   // identifying where in the context the serialization came from.
   nsAutoString mHTMLInfoBuffer;
 };
@@ -208,17 +210,17 @@ static nsresult EncodeDocumentWithContext(
     EncodedDocumentWithContext& aEncodedDocumentWithContext) {
   nsCOMPtr<nsIDocumentEncoder> docEncoder = do_createHTMLCopyEncoder();
 
-  bool unicodeEncodingIsTextHTML{false};
-  nsAutoString serializationForTextUnicode;
-  nsresult rv = EncodeForTextUnicode(
+  bool canBeEncodedAsTextHTML{false};
+  nsAutoString serializationForTextPlain;
+  nsresult rv = EncodeForTextPlain(
       *docEncoder, aDocument, aSelection, aAdditionalEncoderFlags,
-      unicodeEncodingIsTextHTML, serializationForTextUnicode);
+      canBeEncodedAsTextHTML, serializationForTextPlain);
   NS_ENSURE_SUCCESS(rv, rv);
 
   nsAutoString serializationForTextHTML;
   nsAutoString htmlContextBuffer;
   nsAutoString htmlInfoBuffer;
-  if (unicodeEncodingIsTextHTML) {
+  if (canBeEncodedAsTextHTML) {
     // Redo the encoding, but this time use the passed-in flags.
     // Don't allow wrapping of CJK strings.
     rv = EncodeAsTextHTMLWithContext(
@@ -230,7 +232,7 @@ static nsresult EncodeDocumentWithContext(
   }
 
   aEncodedDocumentWithContext = {
-      unicodeEncodingIsTextHTML, std::move(serializationForTextUnicode),
+      canBeEncodedAsTextHTML, std::move(serializationForTextPlain),
       std::move(serializationForTextHTML), std::move(htmlContextBuffer),
       std::move(htmlInfoBuffer)};
 
@@ -247,7 +249,10 @@ static nsresult CreateTransferable(
 
   aTransferable->Init(aDocument.GetLoadContext());
   aTransferable->SetDataPrincipal(aDocument.NodePrincipal());
-  if (aEncodedDocumentWithContext.mUnicodeEncodingIsTextHTML) {
+  if (aEncodedDocumentWithContext.mCanBeEncodedAsTextHTML) {
+    // XXX: Now we provide the text/plain directly, do we still need to always
+    // set a HTML converter? Or perhaps we could set the converter only when the
+    // text/plain is not available.
     // Set up a format converter so that clipboard flavor queries work.
     // This converter isn't really used for conversions.
     nsCOMPtr<nsIFormatConverter> htmlConverter =
@@ -276,15 +281,14 @@ static nsresult CreateTransferable(
       NS_ENSURE_SUCCESS(rv, rv);
     }
 
-    if (!aEncodedDocumentWithContext.mSerializationForTextUnicode.IsEmpty()) {
-      // unicode text
+    if (!aEncodedDocumentWithContext.mSerializationForTextPlain.IsEmpty()) {
       // Add the plain text DataFlavor to the transferable
       // If we didn't have this, then nsDataObj::GetData matches
       // text/plain against the kURLMime flavour which is not desirable
       // (eg. when pasting into Notepad)
-      rv = AppendString(
-          aTransferable,
-          aEncodedDocumentWithContext.mSerializationForTextUnicode, kTextMime);
+      rv = AppendString(aTransferable,
+                        aEncodedDocumentWithContext.mSerializationForTextPlain,
+                        kTextMime);
       NS_ENSURE_SUCCESS(rv, rv);
     }
 
@@ -309,11 +313,11 @@ static nsresult CreateTransferable(
       }
     }
   } else {
-    if (!aEncodedDocumentWithContext.mSerializationForTextUnicode.IsEmpty()) {
+    if (!aEncodedDocumentWithContext.mSerializationForTextPlain.IsEmpty()) {
       // Add the unicode DataFlavor to the transferable
-      rv = AppendString(
-          aTransferable,
-          aEncodedDocumentWithContext.mSerializationForTextUnicode, kTextMime);
+      rv = AppendString(aTransferable,
+                        aEncodedDocumentWithContext.mSerializationForTextPlain,
+                        kTextMime);
       NS_ENSURE_SUCCESS(rv, rv);
     }
   }
diff --git a/dom/serializers/nsDocumentEncoder.cpp b/dom/serializers/nsDocumentEncoder.cpp
@@ -1597,7 +1597,7 @@ already_AddRefed<nsIDocumentEncoder> do_createDocumentEncoder(
   return nullptr;
 }
 
-class nsHTMLCopyEncoder : public nsDocumentEncoder {
+class nsHTMLCopyEncoder final : public nsDocumentEncoder {
  private:
   class RangeNodeContext final : public nsDocumentEncoder::RangeNodeContext {
     bool IncludeInContext(nsINode& aNode) const final;
@@ -1638,13 +1638,11 @@ class nsHTMLCopyEncoder : public nsDocumentEncoder {
   static bool IsFirstNode(nsINode* aNode);
   static bool IsLastNode(nsINode* aNode);
 
-  bool mIsTextWidget;
+  bool mIsTextWidget{false};
 };
 
 nsHTMLCopyEncoder::nsHTMLCopyEncoder()
-    : nsDocumentEncoder{MakeUnique<nsHTMLCopyEncoder::RangeNodeContext>()} {
-  mIsTextWidget = false;
-}
+    : nsDocumentEncoder{MakeUnique<nsHTMLCopyEncoder::RangeNodeContext>()} {}
 
 nsHTMLCopyEncoder::~nsHTMLCopyEncoder() = default;
 
@@ -1659,14 +1657,14 @@ nsHTMLCopyEncoder::Init(Document* aDocument, const nsAString& aMimeType,
   mIsCopying = true;
   mDocument = aDocument;
 
-  // Hack, hack! Traditionally, the caller passes text/plain, which is
-  // treated as "guess text/html or text/plain" in this context. (It has a
-  // different meaning in other contexts. Sigh.) From now on, "text/plain"
-  // means forcing text/plain instead of guessing.
-  if (aMimeType.EqualsLiteral("text/plain")) {
-    mMimeType.AssignLiteral("text/plain");
+  // nsHTMLCopyEncoder only accepts "text/plain" or "text/html" MIME types, and
+  // the initial MIME type may change after setting the selection.
+  MOZ_ASSERT(aMimeType.EqualsLiteral(kTextMime) ||
+             aMimeType.EqualsLiteral(kHTMLMime));
+  if (aMimeType.EqualsLiteral(kTextMime)) {
+    mMimeType.AssignLiteral(kTextMime);
   } else {
-    mMimeType.AssignLiteral("text/html");
+    mMimeType.AssignLiteral(kHTMLMime);
   }
 
   // Make all links absolute when copying
@@ -1728,6 +1726,8 @@ nsHTMLCopyEncoder::SetSelection(Selection* aSelection) {
   // XXX bug 1245883
 
   // also consider ourselves in a text widget if we can't find an html document
+  // XXX: nsCopySupport relies on the MIME type not being updated immediately
+  // here, so it can apply different encoding for XHTML documents.
   if (!(mDocument && mDocument->IsHTMLDocument())) {
     mIsTextWidget = true;
     mEncodingScope.mSelection = aSelection;

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	dom/base/nsCopySupport.cpp	\|	154	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	dom/serializers/nsDocumentEncoder.cpp	\|	24	++++++++++++------------