tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsTextToSubURI.cpp (6215B)


      1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 #include "nsString.h"
      6 #include "nsITextToSubURI.h"
      7 #include "nsEscape.h"
      8 #include "nsTextToSubURI.h"
      9 #include "nsCRT.h"
     10 #include "mozilla/Encoding.h"
     11 #include "mozilla/Preferences.h"
     12 #include "mozilla/TextUtils.h"
     13 #include "mozilla/Utf8.h"
     14 
     15 using namespace mozilla;
     16 
     17 nsTextToSubURI::~nsTextToSubURI() = default;
     18 
     19 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
     20 
     21 NS_IMETHODIMP
     22 nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
     23                                 const nsAString& aText, nsACString& aOut) {
     24  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
     25  if (!encoding) {
     26    aOut.Truncate();
     27    return NS_ERROR_UCONV_NOCONV;
     28  }
     29  nsresult rv;
     30  nsAutoCString intermediate;
     31  std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate);
     32  if (NS_FAILED(rv)) {
     33    aOut.Truncate();
     34    return rv;
     35  }
     36  bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
     37  if (!ok) {
     38    aOut.Truncate();
     39    return NS_ERROR_OUT_OF_MEMORY;
     40  }
     41  return NS_OK;
     42 }
     43 
     44 NS_IMETHODIMP
     45 nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
     46                                   const nsACString& aText, nsAString& aOut) {
     47  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
     48  if (!encoding) {
     49    aOut.Truncate();
     50    return NS_ERROR_UCONV_NOCONV;
     51  }
     52  nsAutoCString unescaped(aText);
     53  NS_UnescapeURL(unescaped);
     54  auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
     55  if (NS_SUCCEEDED(rv)) {
     56    return NS_OK;
     57  }
     58  return rv;
     59 }
     60 
     61 static bool statefulCharset(const char* charset) {
     62  // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
     63  // mozilla-central but keeping them here just in case for the benefit of
     64  // comm-central.
     65  if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) ||
     66      !nsCRT::strcasecmp(charset, "UTF-7") ||
     67      !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
     68    return true;
     69 
     70  return false;
     71 }
     72 
     73 // static
     74 nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
     75                                             const nsCString& aURI,
     76                                             nsAString& aOut) {
     77  // check for 7bit encoding the data may not be ASCII after we decode
     78  bool isStatefulCharset = statefulCharset(aCharset.get());
     79 
     80  if (!isStatefulCharset) {
     81    if (IsAscii(aURI)) {
     82      CopyASCIItoUTF16(aURI, aOut);
     83      return NS_OK;
     84    }
     85    if (IsUtf8(aURI)) {
     86      CopyUTF8toUTF16(aURI, aOut);
     87      return NS_OK;
     88    }
     89  }
     90 
     91  // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
     92  NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
     93 
     94  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
     95  if (!encoding) {
     96    aOut.Truncate();
     97    return NS_ERROR_UCONV_NOCONV;
     98  }
     99  return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
    100 }
    101 
    102 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment,
    103                                               bool aDontEscape,
    104                                               nsAString& _retval) {
    105  nsAutoCString unescapedSpec;
    106  // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
    107  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
    108                 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
    109 
    110  // in case of failure, return escaped URI
    111  // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
    112  // sequences are also considered failure in this context
    113  if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) {
    114    // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
    115    CopyUTF8toUTF16(aURIFragment, _retval);
    116  }
    117 
    118  if (aDontEscape) {
    119    return NS_OK;
    120  }
    121 
    122  // If there are any characters that are unsafe for URIs, reescape those.
    123  if (mIDNBlocklist.IsEmpty()) {
    124    mozilla::net::InitializeBlocklist(mIDNBlocklist);
    125    // we allow SPACE and IDEOGRAPHIC SPACE in this method
    126    mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist);
    127    mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist);
    128  }
    129 
    130  MOZ_ASSERT(!mIDNBlocklist.IsEmpty());
    131  const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
    132  nsString reescapedSpec;
    133  _retval = NS_EscapeURL(
    134      unescapedResult,
    135      [&](char16_t aChar) -> bool {
    136        return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist);
    137      },
    138      reescapedSpec);
    139 
    140  return NS_OK;
    141 }
    142 
    143 NS_IMETHODIMP
    144 nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset,
    145                                      const nsACString& aURIFragment,
    146                                      nsAString& _retval) {
    147  return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval);
    148 }
    149 
    150 // static
    151 nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
    152                                             const nsACString& aURIFragment,
    153                                             nsAString& _retval) {
    154  nsAutoCString unescapedSpec;
    155  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
    156                 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
    157  // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
    158  // superset since converting "http:" with such an encoding is always a bad
    159  // idea.
    160  if (!IsUtf8(unescapedSpec) &&
    161      (aCharset.LowerCaseEqualsLiteral("utf-16") ||
    162       aCharset.LowerCaseEqualsLiteral("utf-16be") ||
    163       aCharset.LowerCaseEqualsLiteral("utf-16le") ||
    164       aCharset.LowerCaseEqualsLiteral("utf-7") ||
    165       aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) {
    166    CopyASCIItoUTF16(aURIFragment, _retval);
    167    return NS_OK;
    168  }
    169 
    170  nsresult rv =
    171      convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval);
    172  // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
    173  // if the string ends with a valid (but incomplete) sequence.
    174  return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
    175 }
    176 
    177 //----------------------------------------------------------------------