commit 70e6d16fa38a412a54621178927ca82232a59341
parent ff7acc27f88d8c020418ef62cf1a7b6fea61984d
Author: Edgar Chen <echen@mozilla.com>
Date: Wed, 10 Dec 2025 16:02:27 +0000
Bug 2004603 - Avoid encoding for `text/html` twice for copying selection; r=hsivonen
The only remaining purpose of the additional encoding is to detect XHTML documents,
but I think we can handle XHTML documents without doing that.
Differential Revision: https://phabricator.services.mozilla.com/D275398
Diffstat:
2 files changed, 91 insertions(+), 87 deletions(-)
diff --git a/dom/base/nsCopySupport.cpp b/dom/base/nsCopySupport.cpp
@@ -83,18 +83,19 @@ static nsresult AppendImagePromise(nsITransferable* aTransferable,
nsINode* aImageNode);
#endif
-static nsresult EncodeForTextUnicode(nsIDocumentEncoder& aEncoder,
- Document& aDocument, Selection* aSelection,
- uint32_t aAdditionalEncoderFlags,
- bool& aEncodedAsTextHTMLResult,
- nsAutoString& aSerializationResult) {
- // note that we assign text/unicode as mime type, but in fact
- // nsHTMLCopyEncoder ignore it and use text/html or text/plain depending where
- // the selection is. if it is a selection into input/textarea element or in a
- // html content with pre-wrap style : text/plain. Otherwise text/html. see
- // nsHTMLCopyEncoder::SetSelection
+static nsresult EncodeForTextPlain(nsIDocumentEncoder& aEncoder,
+ Document& aDocument, Selection* aSelection,
+ uint32_t aAdditionalEncoderFlags,
+ bool& aCanBeEncodedAsTextHTML,
+ nsAString& aSerializationResult) {
+ // We assign text/html as the MIME type first, but in fact nsHTMLCopyEncoder
+ // force the use of text/plain depending where the selection is (e.g., a
+ // selection inside an <input> or <textarea> element). See
+ // nsHTMLCopyEncoder::SetSelection. We can then use this behavior to detect
+ // whether the selection can be encoded as text/html by checking the MIME type
+ // after nsHTMLCopyEncoder::SetSelection.
nsAutoString mimeType;
- mimeType.AssignLiteral("text/unicode");
+ mimeType.AssignLiteral(kHTMLMime);
// Do the first and potentially trial encoding as preformatted and raw.
uint32_t flags = aAdditionalEncoderFlags |
@@ -113,45 +114,51 @@ static nsresult EncodeForTextUnicode(nsIDocumentEncoder& aEncoder,
// text widget.
rv = aEncoder.GetMimeType(mimeType);
NS_ENSURE_SUCCESS(rv, rv);
- bool selForcedTextPlain = mimeType.EqualsLiteral(kTextMime);
-
- nsAutoString buf;
- rv = aEncoder.EncodeToString(buf);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = aEncoder.GetMimeType(mimeType);
- NS_ENSURE_SUCCESS(rv, rv);
+ // XXX: For XHTML documents, we would like to use pretty-printing encoding for
+ // text/plain, just as we do for HTML documents. This is achieved by
+ // relying on the current nsHTMLCopyEncoder design, where the MIME
+ // type is not updated to text/plain immediately in
+ // nsHTMLCopyEncoder::SetSelection(), but only latter when
+ // nsHTMLCopyEncoder::EncodeToString() is called. As a result, we still see a
+ // text/html MIME type here for XHTML documents.
+ if (mimeType.EqualsLiteral(kTextMime)) {
+ // nsHTMLCopyEncoder force to use text/plain.
+ nsAutoString buf;
+ rv = aEncoder.EncodeToString(buf);
+ if (NS_SUCCEEDED(rv)) {
+ // Nothing to do. buf contains the final, preformatted, raw text/plain.
+ aSerializationResult.Assign(buf);
+ }
+ return rv;
+ }
- // The mime type is ultimately text/html if the encoder successfully encoded
- // the selection as text/html.
- aEncodedAsTextHTMLResult = mimeType.EqualsLiteral(kHTMLMime);
+ MOZ_ASSERT(mimeType.EqualsLiteral(kHTMLMime));
+ // XXX: We currently only try to encode as text/html for HTML documents.
+ // See bug 857915.
+ if (aDocument.IsHTMLDocument()) {
+ aCanBeEncodedAsTextHTML = true;
+ }
- if (selForcedTextPlain) {
- // Nothing to do. buf contains the final, preformatted, raw text/plain.
- aSerializationResult.Assign(buf);
- } else {
- // Redo the encoding, but this time use pretty printing.
- flags = nsIDocumentEncoder::OutputSelectionOnly |
- nsIDocumentEncoder::OutputForPlainTextClipboardCopy |
- nsIDocumentEncoder::OutputAbsoluteLinks |
- nsIDocumentEncoder::SkipInvisibleContent |
- nsIDocumentEncoder::OutputDropInvisibleBreak |
- (aAdditionalEncoderFlags &
- (nsIDocumentEncoder::OutputNoScriptContent |
- nsIDocumentEncoder::OutputRubyAnnotation |
- nsIDocumentEncoder::AllowCrossShadowBoundary));
-
- mimeType.AssignLiteral(kTextMime);
- rv = aEncoder.Init(&aDocument, mimeType, flags);
- NS_ENSURE_SUCCESS(rv, rv);
+ // Do the text/plain encoding, but this time use pretty printing.
+ flags = nsIDocumentEncoder::OutputSelectionOnly |
+ nsIDocumentEncoder::OutputForPlainTextClipboardCopy |
+ nsIDocumentEncoder::OutputAbsoluteLinks |
+ nsIDocumentEncoder::SkipInvisibleContent |
+ nsIDocumentEncoder::OutputDropInvisibleBreak |
+ (aAdditionalEncoderFlags &
+ (nsIDocumentEncoder::OutputNoScriptContent |
+ nsIDocumentEncoder::OutputRubyAnnotation |
+ nsIDocumentEncoder::AllowCrossShadowBoundary));
- rv = aEncoder.SetSelection(aSelection);
- NS_ENSURE_SUCCESS(rv, rv);
+ mimeType.AssignLiteral(kTextMime);
+ rv = aEncoder.Init(&aDocument, mimeType, flags);
+ NS_ENSURE_SUCCESS(rv, rv);
- rv = aEncoder.EncodeToString(aSerializationResult);
- NS_ENSURE_SUCCESS(rv, rv);
- }
+ rv = aEncoder.SetSelection(aSelection);
+ NS_ENSURE_SUCCESS(rv, rv);
+ rv = aEncoder.EncodeToString(aSerializationResult);
return rv;
}
@@ -174,26 +181,21 @@ static nsresult EncodeAsTextHTMLWithContext(
}
struct EncodedDocumentWithContext {
- // When determining `mSerializationForTextUnicode`, `text/unicode` is passed
- // as mime type to the encoder. It uses this as a switch to decide whether to
- // encode the document as `text/html` or `text/plain`. It is `true` iff
- // `text/html` was used.
- bool mUnicodeEncodingIsTextHTML = false;
-
- // The serialized document when encoding the document with `text/unicode`. See
- // comment of `mUnicodeEncodingIsTextHTML`.
- nsAutoString mSerializationForTextUnicode;
-
- // When `mUnicodeEncodingIsTextHTML` is true, this is the serialized document
- // using `text/html`. Its value may differ from `mSerializationForTextHTML`,
- // because different flags were passed to the encoder.
+ // Whether the document can be encoded as text/html.
+ bool mCanBeEncodedAsTextHTML = false;
+
+ // The serialized document when encoding the document with `text/plain`.
+ nsAutoString mSerializationForTextPlain;
+
+ // When `mCanBeEncodedAsTextHTML` is true, this is the serialized document
+ // using `text/html`.
nsAutoString mSerializationForTextHTML;
- // When `mUnicodeEncodingIsTextHTML` is true, this contains the serialized
+ // When `mCanBeEncodedAsTextHTML` is true, this contains the serialized
// ancestor elements.
nsAutoString mHTMLContextBuffer;
- // When `mUnicodeEncodingIsTextHTML` is true, this contains numbers
+ // When `mCanBeEncodedAsTextHTML` is true, this contains numbers
// identifying where in the context the serialization came from.
nsAutoString mHTMLInfoBuffer;
};
@@ -208,17 +210,17 @@ static nsresult EncodeDocumentWithContext(
EncodedDocumentWithContext& aEncodedDocumentWithContext) {
nsCOMPtr<nsIDocumentEncoder> docEncoder = do_createHTMLCopyEncoder();
- bool unicodeEncodingIsTextHTML{false};
- nsAutoString serializationForTextUnicode;
- nsresult rv = EncodeForTextUnicode(
+ bool canBeEncodedAsTextHTML{false};
+ nsAutoString serializationForTextPlain;
+ nsresult rv = EncodeForTextPlain(
*docEncoder, aDocument, aSelection, aAdditionalEncoderFlags,
- unicodeEncodingIsTextHTML, serializationForTextUnicode);
+ canBeEncodedAsTextHTML, serializationForTextPlain);
NS_ENSURE_SUCCESS(rv, rv);
nsAutoString serializationForTextHTML;
nsAutoString htmlContextBuffer;
nsAutoString htmlInfoBuffer;
- if (unicodeEncodingIsTextHTML) {
+ if (canBeEncodedAsTextHTML) {
// Redo the encoding, but this time use the passed-in flags.
// Don't allow wrapping of CJK strings.
rv = EncodeAsTextHTMLWithContext(
@@ -230,7 +232,7 @@ static nsresult EncodeDocumentWithContext(
}
aEncodedDocumentWithContext = {
- unicodeEncodingIsTextHTML, std::move(serializationForTextUnicode),
+ canBeEncodedAsTextHTML, std::move(serializationForTextPlain),
std::move(serializationForTextHTML), std::move(htmlContextBuffer),
std::move(htmlInfoBuffer)};
@@ -247,7 +249,10 @@ static nsresult CreateTransferable(
aTransferable->Init(aDocument.GetLoadContext());
aTransferable->SetDataPrincipal(aDocument.NodePrincipal());
- if (aEncodedDocumentWithContext.mUnicodeEncodingIsTextHTML) {
+ if (aEncodedDocumentWithContext.mCanBeEncodedAsTextHTML) {
+ // XXX: Now we provide the text/plain directly, do we still need to always
+ // set a HTML converter? Or perhaps we could set the converter only when the
+ // text/plain is not available.
// Set up a format converter so that clipboard flavor queries work.
// This converter isn't really used for conversions.
nsCOMPtr<nsIFormatConverter> htmlConverter =
@@ -276,15 +281,14 @@ static nsresult CreateTransferable(
NS_ENSURE_SUCCESS(rv, rv);
}
- if (!aEncodedDocumentWithContext.mSerializationForTextUnicode.IsEmpty()) {
- // unicode text
+ if (!aEncodedDocumentWithContext.mSerializationForTextPlain.IsEmpty()) {
// Add the plain text DataFlavor to the transferable
// If we didn't have this, then nsDataObj::GetData matches
// text/plain against the kURLMime flavour which is not desirable
// (eg. when pasting into Notepad)
- rv = AppendString(
- aTransferable,
- aEncodedDocumentWithContext.mSerializationForTextUnicode, kTextMime);
+ rv = AppendString(aTransferable,
+ aEncodedDocumentWithContext.mSerializationForTextPlain,
+ kTextMime);
NS_ENSURE_SUCCESS(rv, rv);
}
@@ -309,11 +313,11 @@ static nsresult CreateTransferable(
}
}
} else {
- if (!aEncodedDocumentWithContext.mSerializationForTextUnicode.IsEmpty()) {
+ if (!aEncodedDocumentWithContext.mSerializationForTextPlain.IsEmpty()) {
// Add the unicode DataFlavor to the transferable
- rv = AppendString(
- aTransferable,
- aEncodedDocumentWithContext.mSerializationForTextUnicode, kTextMime);
+ rv = AppendString(aTransferable,
+ aEncodedDocumentWithContext.mSerializationForTextPlain,
+ kTextMime);
NS_ENSURE_SUCCESS(rv, rv);
}
}
diff --git a/dom/serializers/nsDocumentEncoder.cpp b/dom/serializers/nsDocumentEncoder.cpp
@@ -1597,7 +1597,7 @@ already_AddRefed<nsIDocumentEncoder> do_createDocumentEncoder(
return nullptr;
}
-class nsHTMLCopyEncoder : public nsDocumentEncoder {
+class nsHTMLCopyEncoder final : public nsDocumentEncoder {
private:
class RangeNodeContext final : public nsDocumentEncoder::RangeNodeContext {
bool IncludeInContext(nsINode& aNode) const final;
@@ -1638,13 +1638,11 @@ class nsHTMLCopyEncoder : public nsDocumentEncoder {
static bool IsFirstNode(nsINode* aNode);
static bool IsLastNode(nsINode* aNode);
- bool mIsTextWidget;
+ bool mIsTextWidget{false};
};
nsHTMLCopyEncoder::nsHTMLCopyEncoder()
- : nsDocumentEncoder{MakeUnique<nsHTMLCopyEncoder::RangeNodeContext>()} {
- mIsTextWidget = false;
-}
+ : nsDocumentEncoder{MakeUnique<nsHTMLCopyEncoder::RangeNodeContext>()} {}
nsHTMLCopyEncoder::~nsHTMLCopyEncoder() = default;
@@ -1659,14 +1657,14 @@ nsHTMLCopyEncoder::Init(Document* aDocument, const nsAString& aMimeType,
mIsCopying = true;
mDocument = aDocument;
- // Hack, hack! Traditionally, the caller passes text/plain, which is
- // treated as "guess text/html or text/plain" in this context. (It has a
- // different meaning in other contexts. Sigh.) From now on, "text/plain"
- // means forcing text/plain instead of guessing.
- if (aMimeType.EqualsLiteral("text/plain")) {
- mMimeType.AssignLiteral("text/plain");
+ // nsHTMLCopyEncoder only accepts "text/plain" or "text/html" MIME types, and
+ // the initial MIME type may change after setting the selection.
+ MOZ_ASSERT(aMimeType.EqualsLiteral(kTextMime) ||
+ aMimeType.EqualsLiteral(kHTMLMime));
+ if (aMimeType.EqualsLiteral(kTextMime)) {
+ mMimeType.AssignLiteral(kTextMime);
} else {
- mMimeType.AssignLiteral("text/html");
+ mMimeType.AssignLiteral(kHTMLMime);
}
// Make all links absolute when copying
@@ -1728,6 +1726,8 @@ nsHTMLCopyEncoder::SetSelection(Selection* aSelection) {
// XXX bug 1245883
// also consider ourselves in a text widget if we can't find an html document
+ // XXX: nsCopySupport relies on the MIME type not being updated immediately
+ // here, so it can apply different encoding for XHTML documents.
if (!(mDocument && mDocument->IsHTMLDocument())) {
mIsTextWidget = true;
mEncodingScope.mSelection = aSelection;