nsTextToSubURI.cpp (6215B)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 #include "nsString.h" 6 #include "nsITextToSubURI.h" 7 #include "nsEscape.h" 8 #include "nsTextToSubURI.h" 9 #include "nsCRT.h" 10 #include "mozilla/Encoding.h" 11 #include "mozilla/Preferences.h" 12 #include "mozilla/TextUtils.h" 13 #include "mozilla/Utf8.h" 14 15 using namespace mozilla; 16 17 nsTextToSubURI::~nsTextToSubURI() = default; 18 19 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) 20 21 NS_IMETHODIMP 22 nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, 23 const nsAString& aText, nsACString& aOut) { 24 auto encoding = Encoding::ForLabelNoReplacement(aCharset); 25 if (!encoding) { 26 aOut.Truncate(); 27 return NS_ERROR_UCONV_NOCONV; 28 } 29 nsresult rv; 30 nsAutoCString intermediate; 31 std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate); 32 if (NS_FAILED(rv)) { 33 aOut.Truncate(); 34 return rv; 35 } 36 bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); 37 if (!ok) { 38 aOut.Truncate(); 39 return NS_ERROR_OUT_OF_MEMORY; 40 } 41 return NS_OK; 42 } 43 44 NS_IMETHODIMP 45 nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, 46 const nsACString& aText, nsAString& aOut) { 47 auto encoding = Encoding::ForLabelNoReplacement(aCharset); 48 if (!encoding) { 49 aOut.Truncate(); 50 return NS_ERROR_UCONV_NOCONV; 51 } 52 nsAutoCString unescaped(aText); 53 NS_UnescapeURL(unescaped); 54 auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); 55 if (NS_SUCCEEDED(rv)) { 56 return NS_OK; 57 } 58 return rv; 59 } 60 61 static bool statefulCharset(const char* charset) { 62 // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in 63 // mozilla-central but keeping them here just in case for the benefit of 64 // comm-central. 65 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) || 66 !nsCRT::strcasecmp(charset, "UTF-7") || 67 !nsCRT::strcasecmp(charset, "HZ-GB-2312")) 68 return true; 69 70 return false; 71 } 72 73 // static 74 nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, 75 const nsCString& aURI, 76 nsAString& aOut) { 77 // check for 7bit encoding the data may not be ASCII after we decode 78 bool isStatefulCharset = statefulCharset(aCharset.get()); 79 80 if (!isStatefulCharset) { 81 if (IsAscii(aURI)) { 82 CopyASCIItoUTF16(aURI, aOut); 83 return NS_OK; 84 } 85 if (IsUtf8(aURI)) { 86 CopyUTF8toUTF16(aURI, aOut); 87 return NS_OK; 88 } 89 } 90 91 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. 92 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); 93 94 auto encoding = Encoding::ForLabelNoReplacement(aCharset); 95 if (!encoding) { 96 aOut.Truncate(); 97 return NS_ERROR_UCONV_NOCONV; 98 } 99 return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); 100 } 101 102 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment, 103 bool aDontEscape, 104 nsAString& _retval) { 105 nsAutoCString unescapedSpec; 106 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping 107 NS_UnescapeURL(PromiseFlatCString(aURIFragment), 108 esc_SkipControl | esc_AlwaysCopy, unescapedSpec); 109 110 // in case of failure, return escaped URI 111 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte 112 // sequences are also considered failure in this context 113 if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) { 114 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 115 CopyUTF8toUTF16(aURIFragment, _retval); 116 } 117 118 if (aDontEscape) { 119 return NS_OK; 120 } 121 122 // If there are any characters that are unsafe for URIs, reescape those. 123 if (mIDNBlocklist.IsEmpty()) { 124 mozilla::net::InitializeBlocklist(mIDNBlocklist); 125 // we allow SPACE and IDEOGRAPHIC SPACE in this method 126 mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist); 127 mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist); 128 } 129 130 MOZ_ASSERT(!mIDNBlocklist.IsEmpty()); 131 const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); 132 nsString reescapedSpec; 133 _retval = NS_EscapeURL( 134 unescapedResult, 135 [&](char16_t aChar) -> bool { 136 return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist); 137 }, 138 reescapedSpec); 139 140 return NS_OK; 141 } 142 143 NS_IMETHODIMP 144 nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset, 145 const nsACString& aURIFragment, 146 nsAString& _retval) { 147 return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval); 148 } 149 150 // static 151 nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, 152 const nsACString& aURIFragment, 153 nsAString& _retval) { 154 nsAutoCString unescapedSpec; 155 NS_UnescapeURL(PromiseFlatCString(aURIFragment), 156 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); 157 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII 158 // superset since converting "http:" with such an encoding is always a bad 159 // idea. 160 if (!IsUtf8(unescapedSpec) && 161 (aCharset.LowerCaseEqualsLiteral("utf-16") || 162 aCharset.LowerCaseEqualsLiteral("utf-16be") || 163 aCharset.LowerCaseEqualsLiteral("utf-16le") || 164 aCharset.LowerCaseEqualsLiteral("utf-7") || 165 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) { 166 CopyASCIItoUTF16(aURIFragment, _retval); 167 return NS_OK; 168 } 169 170 nsresult rv = 171 convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval); 172 // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error 173 // if the string ends with a valid (but incomplete) sequence. 174 return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; 175 } 176 177 //----------------------------------------------------------------------