tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsUnknownDecoder.cpp (26973B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "nsUnknownDecoder.h"
      7 #include "nsIPipe.h"
      8 #include "nsIInputStream.h"
      9 #include "nsIOutputStream.h"
     10 #include "nsMimeTypes.h"
     11 
     12 #include "nsCRT.h"
     13 
     14 #include "nsIMIMEService.h"
     15 
     16 #include "nsIViewSourceChannel.h"
     17 #include "nsIHttpChannel.h"
     18 #include "nsIForcePendingChannel.h"
     19 #include "nsIEncodedChannel.h"
     20 #include "nsIURI.h"
     21 #include "nsStringStream.h"
     22 #include "nsNetCID.h"
     23 #include "nsNetUtil.h"
     24 #include "nsQueryObject.h"
     25 #include "nsComponentManagerUtils.h"
     26 #include "nsServiceManagerUtils.h"
     27 #include "mozilla/StaticPrefs_network.h"
     28 
     29 #include <algorithm>
     30 
     31 #define MAX_BUFFER_SIZE 512u
     32 
     33 using namespace mozilla;
     34 
     35 NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, nsIStreamListener,
     36                  nsIRequestObserver)
     37 
     38 nsUnknownDecoder::ConvertedStreamListener::ConvertedStreamListener(
     39    nsUnknownDecoder* aDecoder) {
     40  mDecoder = aDecoder;
     41 }
     42 
     43 nsresult nsUnknownDecoder::ConvertedStreamListener::AppendDataToString(
     44    nsIInputStream* inputStream, void* closure, const char* rawSegment,
     45    uint32_t toOffset, uint32_t count, uint32_t* writeCount) {
     46  nsCString* decodedData = static_cast<nsCString*>(closure);
     47  decodedData->Append(rawSegment, count);
     48  *writeCount = count;
     49  return NS_OK;
     50 }
     51 
     52 NS_IMETHODIMP
     53 nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request) {
     54  return NS_OK;
     55 }
     56 
     57 NS_IMETHODIMP
     58 nsUnknownDecoder::ConvertedStreamListener::OnDataAvailable(
     59    nsIRequest* request, nsIInputStream* stream, uint64_t offset,
     60    uint32_t count) {
     61  uint32_t read;
     62  nsAutoCString decodedData;
     63  {
     64    MutexAutoLock lock(mDecoder->mMutex);
     65    decodedData = mDecoder->mDecodedData;
     66  }
     67  nsresult rv =
     68      stream->ReadSegments(AppendDataToString, &decodedData, count, &read);
     69  if (NS_FAILED(rv)) {
     70    return rv;
     71  }
     72  MutexAutoLock lock(mDecoder->mMutex);
     73  mDecoder->mDecodedData = decodedData;
     74  return NS_OK;
     75 }
     76 
     77 NS_IMETHODIMP
     78 nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request,
     79                                                         nsresult status) {
     80  return NS_OK;
     81 }
     82 
     83 nsUnknownDecoder::nsUnknownDecoder(nsIStreamListener* aListener)
     84    : mNextListener(aListener),
     85      mBuffer(nullptr),
     86      mBufferLen(0),
     87      mMutex("nsUnknownDecoder"),
     88      mDecodedData("") {}
     89 
     90 nsUnknownDecoder::~nsUnknownDecoder() {
     91  if (mBuffer) {
     92    delete[] mBuffer;
     93    mBuffer = nullptr;
     94  }
     95 }
     96 
     97 // ----
     98 //
     99 // nsISupports implementation...
    100 //
    101 // ----
    102 
    103 NS_IMPL_ADDREF(nsUnknownDecoder)
    104 NS_IMPL_RELEASE(nsUnknownDecoder)
    105 
    106 NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
    107  NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
    108  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
    109  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
    110  NS_INTERFACE_MAP_ENTRY(nsIContentSniffer)
    111  NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener)
    112  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports,
    113                                   nsIThreadRetargetableStreamListener)
    114 NS_INTERFACE_MAP_END
    115 
    116 // ----
    117 //
    118 // nsIStreamConverter methods...
    119 //
    120 // ----
    121 
    122 NS_IMETHODIMP
    123 nsUnknownDecoder::Convert(nsIInputStream* aFromStream, const char* aFromType,
    124                          const char* aToType, nsISupports* aCtxt,
    125                          nsIInputStream** aResultStream) {
    126  return NS_ERROR_NOT_IMPLEMENTED;
    127 }
    128 
    129 NS_IMETHODIMP
    130 nsUnknownDecoder::AsyncConvertData(const char* aFromType, const char* aToType,
    131                                   nsIStreamListener* aListener,
    132                                   nsISupports* aCtxt) {
    133  NS_ASSERTION(aListener && aFromType && aToType,
    134               "null pointer passed into multi mixed converter");
    135  // hook up our final listener. this guy gets the various On*() calls we want
    136  // to throw at him.
    137  //
    138 
    139  MutexAutoLock lock(mMutex);
    140  mNextListener = aListener;
    141  return (aListener) ? NS_OK : NS_ERROR_FAILURE;
    142 }
    143 
    144 NS_IMETHODIMP
    145 nsUnknownDecoder::GetConvertedType(const nsACString& aFromType,
    146                                   nsIChannel* aChannel, nsACString& aToType) {
    147  return NS_ERROR_NOT_IMPLEMENTED;
    148 }
    149 
    150 // ----
    151 //
    152 // nsIStreamListener methods...
    153 //
    154 // ----
    155 
    156 NS_IMETHODIMP
    157 nsUnknownDecoder::OnDataAvailable(nsIRequest* request, nsIInputStream* aStream,
    158                                  uint64_t aSourceOffset, uint32_t aCount) {
    159  nsresult rv = NS_OK;
    160 
    161  bool contentTypeEmpty;
    162  {
    163    MutexAutoLock lock(mMutex);
    164    if (!mNextListener) return NS_ERROR_FAILURE;
    165 
    166    contentTypeEmpty = mContentType.IsEmpty();
    167  }
    168 
    169  if (contentTypeEmpty) {
    170    uint32_t count, len;
    171 
    172    // If the buffer has not been allocated by now, just fail...
    173    if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
    174 
    175    //
    176    // Determine how much of the stream should be read to fill up the
    177    // sniffer buffer...
    178    //
    179    if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
    180      count = MAX_BUFFER_SIZE - mBufferLen;
    181    } else {
    182      count = aCount;
    183    }
    184 
    185    // Read the data into the buffer...
    186    rv = aStream->Read((mBuffer + mBufferLen), count, &len);
    187    if (NS_FAILED(rv)) return rv;
    188 
    189    mBufferLen += len;
    190    aCount -= len;
    191 
    192    if (aCount) {
    193      //
    194      // Adjust the source offset...  The call to FireListenerNotifications(...)
    195      // will make the first OnDataAvailable(...) call with an offset of 0.
    196      // So, this offset needs to be adjusted to reflect that...
    197      //
    198      aSourceOffset += mBufferLen;
    199 
    200      DetermineContentType(request);
    201 
    202      rv = FireListenerNotifications(request, nullptr);
    203    }
    204  }
    205 
    206  // Must not fire ODA again if it failed once
    207  if (aCount && NS_SUCCEEDED(rv)) {
    208 #ifdef DEBUG
    209    {
    210      MutexAutoLock lock(mMutex);
    211      NS_ASSERTION(!mContentType.IsEmpty(),
    212                   "Content type should be known by now.");
    213    }
    214 #endif
    215 
    216    nsCOMPtr<nsIStreamListener> listener;
    217    {
    218      MutexAutoLock lock(mMutex);
    219      listener = mNextListener;
    220    }
    221    rv = listener->OnDataAvailable(request, aStream, aSourceOffset, aCount);
    222  }
    223 
    224  return rv;
    225 }
    226 
    227 NS_IMETHODIMP
    228 nsUnknownDecoder::MaybeRetarget(nsIRequest* request) {
    229  return NS_ERROR_NOT_IMPLEMENTED;
    230 }
    231 
    232 // ----
    233 //
    234 // nsIRequestObserver methods...
    235 //
    236 // ----
    237 
    238 NS_IMETHODIMP
    239 nsUnknownDecoder::OnStartRequest(nsIRequest* request) {
    240  nsresult rv = NS_OK;
    241 
    242  {
    243    MutexAutoLock lock(mMutex);
    244    if (!mNextListener) return NS_ERROR_FAILURE;
    245  }
    246 
    247  // Allocate the sniffer buffer...
    248  if (NS_SUCCEEDED(rv) && !mBuffer) {
    249    mBuffer = new char[MAX_BUFFER_SIZE];
    250 
    251    if (!mBuffer) {
    252      rv = NS_ERROR_OUT_OF_MEMORY;
    253    }
    254  }
    255 
    256  // Do not pass the OnStartRequest on to the next listener (yet)...
    257  return rv;
    258 }
    259 
    260 NS_IMETHODIMP
    261 nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsresult aStatus) {
    262  nsresult rv = NS_OK;
    263 
    264  bool contentTypeEmpty;
    265  {
    266    MutexAutoLock lock(mMutex);
    267    if (!mNextListener) return NS_ERROR_FAILURE;
    268 
    269    contentTypeEmpty = mContentType.IsEmpty();
    270  }
    271 
    272  //
    273  // The total amount of data is less than the size of the sniffer buffer.
    274  // Analyze the buffer now...
    275  //
    276  if (contentTypeEmpty) {
    277    DetermineContentType(request);
    278 
    279    // Make sure channel listeners see channel as pending while we call
    280    // OnStartRequest/OnDataAvailable, even though the underlying channel
    281    // has already hit OnStopRequest.
    282    nsCOMPtr<nsIForcePendingChannel> forcePendingChannel =
    283        do_QueryInterface(request);
    284    if (forcePendingChannel) {
    285      forcePendingChannel->ForcePending(true);
    286    }
    287 
    288    rv = FireListenerNotifications(request, nullptr);
    289 
    290    if (NS_FAILED(rv)) {
    291      aStatus = rv;
    292    }
    293 
    294    // now we need to set pending state to false before calling OnStopRequest
    295    if (forcePendingChannel) {
    296      forcePendingChannel->ForcePending(false);
    297    }
    298  }
    299 
    300  nsCOMPtr<nsIStreamListener> listener;
    301  {
    302    MutexAutoLock lock(mMutex);
    303    listener = mNextListener;
    304    mNextListener = nullptr;
    305  }
    306  rv = listener->OnStopRequest(request, aStatus);
    307 
    308  return rv;
    309 }
    310 
    311 // ----
    312 //
    313 // nsIContentSniffer methods...
    314 //
    315 // ----
    316 NS_IMETHODIMP
    317 nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest,
    318                                         const uint8_t* aData, uint32_t aLength,
    319                                         nsACString& type) {
    320  // This is only used by sniffer, therefore we do not need to lock anything
    321  // here.
    322  nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
    323  if (channel) {
    324    nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
    325    if (loadInfo->GetSkipContentSniffing()) {
    326      return NS_ERROR_NOT_AVAILABLE;
    327    }
    328  }
    329 
    330  mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData));
    331  mBufferLen = aLength;
    332  DetermineContentType(aRequest);
    333  mBuffer = nullptr;
    334  mBufferLen = 0;
    335  type.Assign(mContentType);
    336  mContentType.Truncate();
    337  return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK;
    338 }
    339 
    340 // Actual sniffing code
    341 
    342 /**
    343 * This is the array of sniffer entries that depend on "magic numbers"
    344 * in the file.  Each entry has either a type associated with it (set
    345 * these with the SNIFFER_ENTRY macro) or a function to be executed
    346 * (set these with the SNIFFER_ENTRY_WITH_FUNC macro).  The function
    347 * should take a single nsIRequest* and returns bool -- true if
    348 * it sets mContentType, false otherwise
    349 */
    350 nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
    351    SNIFFER_ENTRY("%PDF-", APPLICATION_PDF),
    352 
    353    SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT),
    354 
    355    // Files that start with mailbox delimiters let's provisionally call
    356    // text/plain
    357    SNIFFER_ENTRY("From", TEXT_PLAIN), SNIFFER_ENTRY(">From", TEXT_PLAIN),
    358 
    359    // If the buffer begins with "#!" or "%!" then it is a script of
    360    // some sort...  "Scripts" can include arbitrary data to be passed
    361    // to an interpreter, so we need to decide whether we can call this
    362    // text or whether it's data.
    363    SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),
    364 
    365    // XXXbz should (and can) we also include the various ways that <?xml can
    366    // appear as UTF-16 and such?  See http://www.w3.org/TR/REC-xml#sec-guessing
    367    SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)};
    368 
    369 uint32_t nsUnknownDecoder::sSnifferEntryNum =
    370    sizeof(nsUnknownDecoder::sSnifferEntries) /
    371    sizeof(nsUnknownDecoder::nsSnifferEntry);
    372 
    373 void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) {
    374  {
    375    MutexAutoLock lock(mMutex);
    376    NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
    377    if (!mContentType.IsEmpty()) return;
    378  }
    379 
    380  nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
    381  if (channel) {
    382    nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
    383    if (loadInfo->GetSkipContentSniffing()) {
    384      /*
    385       * If we did not get a useful Content-Type from the server
    386       * but also have sniffing disabled, just determine whether
    387       * to use text/plain or octetstream and log an error to the Console
    388       */
    389      LastDitchSniff(aRequest);
    390 
    391      nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(aRequest));
    392      if (httpChannel) {
    393        nsAutoCString type;
    394        httpChannel->GetContentType(type);
    395        nsCOMPtr<nsIURI> requestUri;
    396        httpChannel->GetURI(getter_AddRefs(requestUri));
    397        nsAutoCString spec;
    398        requestUri->GetSpec(spec);
    399        if (spec.Length() > 50) {
    400          spec.Truncate(50);
    401          spec.AppendLiteral("...");
    402        }
    403        httpChannel->LogMimeTypeMismatch(
    404            "XTCOWithMIMEValueMissing"_ns, false, NS_ConvertUTF8toUTF16(spec),
    405            // Type is not used in the Error Message but required
    406            NS_ConvertUTF8toUTF16(type));
    407      }
    408      return;
    409    }
    410  }
    411 
    412  const char* testData = mBuffer;
    413  uint32_t testDataLen = mBufferLen;
    414  // Check if data are compressed.
    415  nsAutoCString decodedData;
    416 
    417  if (channel) {
    418    // ConvertEncodedData is always called only on a single thread for each
    419    // instance of an object.
    420    nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen);
    421    if (NS_SUCCEEDED(rv)) {
    422      MutexAutoLock lock(mMutex);
    423      decodedData = mDecodedData;
    424    }
    425    if (!decodedData.IsEmpty()) {
    426      testData = decodedData.get();
    427      testDataLen = std::min<uint32_t>(decodedData.Length(), MAX_BUFFER_SIZE);
    428    }
    429  }
    430 
    431  // First, run through all the types we can detect reliably based on
    432  // magic numbers
    433  uint32_t i;
    434  for (i = 0; i < sSnifferEntryNum; ++i) {
    435    if (testDataLen >= sSnifferEntries[i].mByteLen &&  // enough data
    436        memcmp(testData, sSnifferEntries[i].mBytes,
    437               sSnifferEntries[i].mByteLen) == 0) {  // and type matches
    438      NS_ASSERTION(
    439          sSnifferEntries[i].mMimeType ||
    440              sSnifferEntries[i].mContentTypeSniffer,
    441          "Must have either a type string or a function to set the type");
    442      NS_ASSERTION(!sSnifferEntries[i].mMimeType ||
    443                       !sSnifferEntries[i].mContentTypeSniffer,
    444                   "Both a type string and a type sniffing function set;"
    445                   " using type string");
    446      if (sSnifferEntries[i].mMimeType) {
    447        MutexAutoLock lock(mMutex);
    448        mContentType = sSnifferEntries[i].mMimeType;
    449        NS_ASSERTION(!mContentType.IsEmpty(),
    450                     "Content type should be known by now.");
    451        return;
    452      }
    453      if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
    454 #ifdef DEBUG
    455        MutexAutoLock lock(mMutex);
    456        NS_ASSERTION(!mContentType.IsEmpty(),
    457                     "Content type should be known by now.");
    458 #endif
    459        return;
    460      }
    461    }
    462  }
    463 
    464  nsAutoCString sniffedType;
    465  NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, (const uint8_t*)testData,
    466                  testDataLen, sniffedType);
    467  {
    468    MutexAutoLock lock(mMutex);
    469    mContentType = sniffedType;
    470    if (!mContentType.IsEmpty()) {
    471      return;
    472    }
    473  }
    474 
    475  if (SniffForHTML(aRequest)) {
    476 #ifdef DEBUG
    477    MutexAutoLock lock(mMutex);
    478    NS_ASSERTION(!mContentType.IsEmpty(),
    479                 "Content type should be known by now.");
    480 #endif
    481    return;
    482  }
    483 
    484  nsCOMPtr<nsIURI> uri;
    485  NS_GetFinalChannelURI(channel, getter_AddRefs(uri));
    486 
    487  // We don't know what this is yet.  Before we just give up, try
    488  // the URI from the request.
    489  if ((StaticPrefs::network_sniff_use_extension() ||
    490       (uri && uri->SchemeIs("file"))) &&
    491      SniffURI(aRequest)) {
    492 #ifdef DEBUG
    493    MutexAutoLock lock(mMutex);
    494    NS_ASSERTION(!mContentType.IsEmpty(),
    495                 "Content type should be known by now.");
    496 #endif
    497    return;
    498  }
    499 
    500  LastDitchSniff(aRequest);
    501 #ifdef DEBUG
    502  MutexAutoLock lock(mMutex);
    503  NS_ASSERTION(!mContentType.IsEmpty(), "Content type should be known by now.");
    504 #endif
    505 }
    506 
    507 // https://mimesniff.spec.whatwg.org/#identifying-a-resource-with-an-unknown-mime-type
    508 bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) {
    509  MutexAutoLock lock(mMutex);
    510 
    511  // Now look for HTML.
    512  const char* str;
    513  const char* end;
    514  if (mDecodedData.IsEmpty()) {
    515    str = mBuffer;
    516    end = mBuffer + mBufferLen;
    517  } else {
    518    str = mDecodedData.get();
    519    end = mDecodedData.get() +
    520          std::min<uint32_t>(mDecodedData.Length(), MAX_BUFFER_SIZE);
    521  }
    522 
    523  // skip leading whitespace
    524  while (str != end && nsCRT::IsAsciiSpace(*str)) {
    525    ++str;
    526  }
    527 
    528  // did we find something like a start tag?
    529  if (str == end || *str != '<' || ++str == end) {
    530    return false;
    531  }
    532 
    533  uint32_t bufSize = end - str;
    534  nsDependentCSubstring substr(str, bufSize);
    535 
    536  if (StringBeginsWith(substr, "?xml"_ns)) {
    537    mContentType = TEXT_XML;
    538    return true;
    539  }
    540 
    541  // We use sizeof(_tagstr) below because that's the length of _tagstr
    542  // with the one char " " or ">" appended.
    543 #define MATCHES_TAG(_tagstr)                               \
    544  (substr.Length() >= sizeof(_tagstr) &&                   \
    545   StringBeginsWith(substr, _tagstr##_ns,                  \
    546                    nsCaseInsensitiveCStringComparator) && \
    547   (substr[sizeof(_tagstr) - 1] == ' ' || substr[sizeof(_tagstr) - 1] == '>'))
    548 
    549  if (MATCHES_TAG("!DOCTYPE HTML") || MATCHES_TAG("html") ||
    550      MATCHES_TAG("head") || MATCHES_TAG("script") || MATCHES_TAG("iframe") ||
    551      MATCHES_TAG("h1") || MATCHES_TAG("div") || MATCHES_TAG("font") ||
    552      MATCHES_TAG("table") || MATCHES_TAG("a") || MATCHES_TAG("style") ||
    553      MATCHES_TAG("title") || MATCHES_TAG("b") || MATCHES_TAG("body") ||
    554      MATCHES_TAG("br") || MATCHES_TAG("p") || MATCHES_TAG("!--")) {
    555    mContentType = TEXT_HTML;
    556    return true;
    557  }
    558 
    559  if (StaticPrefs::network_mimesniff_extra_moz_html_tags()) {
    560    if (MATCHES_TAG("frameset") || MATCHES_TAG("img") || MATCHES_TAG("link") ||
    561        MATCHES_TAG("base") || MATCHES_TAG("applet") || MATCHES_TAG("meta") ||
    562        MATCHES_TAG("center") || MATCHES_TAG("form") ||
    563        MATCHES_TAG("isindex") || MATCHES_TAG("h2") || MATCHES_TAG("h3") ||
    564        MATCHES_TAG("h4") || MATCHES_TAG("h5") || MATCHES_TAG("h6") ||
    565        MATCHES_TAG("pre")) {
    566      mContentType = TEXT_HTML;
    567      return true;
    568    }
    569  }
    570 
    571 #undef MATCHES_TAG
    572 
    573  return false;
    574 }
    575 
    576 bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) {
    577  // First see whether we can glean anything from the uri...
    578  if (!StaticPrefs::network_sniff_use_extension() || !SniffURI(aRequest)) {
    579    // Oh well; just generic XML will have to do
    580    MutexAutoLock lock(mMutex);
    581    mContentType = TEXT_XML;
    582  }
    583 
    584  return true;
    585 }
    586 
    587 bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) {
    588  nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
    589  nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
    590  if (loadInfo->GetSkipContentSniffing()) {
    591    return false;
    592  }
    593  nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1"));
    594  if (mimeService) {
    595    nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
    596    if (channel) {
    597      nsCOMPtr<nsIURI> uri;
    598      nsresult result = channel->GetURI(getter_AddRefs(uri));
    599      if (NS_SUCCEEDED(result) && uri) {
    600        nsAutoCString type;
    601        result = mimeService->GetTypeFromURI(uri, type);
    602        if (NS_SUCCEEDED(result)) {
    603          MutexAutoLock lock(mMutex);
    604          mContentType = type;
    605          return true;
    606        }
    607      }
    608    }
    609  }
    610 
    611  return false;
    612 }
    613 
    614 // This macro is based on RFC 2046 Section 4.1.2.  Treat any char 0-31
    615 // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
    616 // encodings like Shift_JIS) as non-text
    617 #define IS_TEXT_CHAR(ch) \
    618  (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)
    619 
    620 bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) {
    621  // All we can do now is try to guess whether this is text/plain or
    622  // application/octet-stream
    623 
    624  MutexAutoLock lock(mMutex);
    625 
    626  const char* testData;
    627  uint32_t testDataLen;
    628  if (mDecodedData.IsEmpty()) {
    629    testData = mBuffer;
    630    // Since some legacy text files end with 0x1A, reading the entire buffer
    631    // will lead misdetection.
    632    testDataLen = std::min<uint32_t>(mBufferLen, MAX_BUFFER_SIZE);
    633  } else {
    634    testData = mDecodedData.get();
    635    testDataLen = std::min<uint32_t>(mDecodedData.Length(), MAX_BUFFER_SIZE);
    636  }
    637 
    638  // First, check for a BOM.  If we see one, assume this is text/plain
    639  // in whatever encoding.  If there is a BOM _and_ text we will
    640  // always have at least 4 bytes in the buffer (since the 2-byte BOMs
    641  // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
    642  if (testDataLen >= 4) {
    643    const unsigned char* buf = (const unsigned char*)testData;
    644    if ((buf[0] == 0xFE && buf[1] == 0xFF) ||  // UTF-16, Big Endian
    645        (buf[0] == 0xFF && buf[1] == 0xFE) ||  // UTF-16 or UCS-4, Little Endian
    646        (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) ||  // UTF-8
    647        (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE &&
    648         buf[3] == 0xFF)) {  // UCS-4, Big Endian
    649 
    650      mContentType = TEXT_PLAIN;
    651      return true;
    652    }
    653  }
    654 
    655  // Now see whether the buffer has any non-text chars.  If not, then let's
    656  // just call it text/plain...
    657  //
    658  uint32_t i;
    659  for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) {
    660  }
    661 
    662  if (i == testDataLen) {
    663    mContentType = TEXT_PLAIN;
    664  } else {
    665    mContentType = APPLICATION_OCTET_STREAM;
    666  }
    667 
    668  return true;
    669 }
    670 
    671 nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,
    672                                                     nsISupports* aCtxt) {
    673  nsresult rv = NS_OK;
    674 
    675  nsCOMPtr<nsIStreamListener> listener;
    676  nsAutoCString contentType;
    677  {
    678    MutexAutoLock lock(mMutex);
    679    if (!mNextListener) return NS_ERROR_FAILURE;
    680 
    681    listener = mNextListener;
    682    contentType = mContentType;
    683  }
    684 
    685  if (!contentType.IsEmpty()) {
    686    nsCOMPtr<nsIViewSourceChannel> viewSourceChannel =
    687        do_QueryInterface(request);
    688    if (viewSourceChannel) {
    689      rv = viewSourceChannel->SetOriginalContentType(contentType);
    690    } else {
    691      nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
    692      if (NS_SUCCEEDED(rv)) {
    693        // Set the new content type on the channel...
    694        rv = channel->SetContentType(contentType);
    695      }
    696    }
    697 
    698    NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!");
    699 
    700    if (NS_FAILED(rv)) {
    701      // Cancel the request to make sure it has the correct status if
    702      // mNextListener looks at it.
    703      request->Cancel(rv);
    704      listener->OnStartRequest(request);
    705      return rv;
    706    }
    707  }
    708 
    709  // Fire the OnStartRequest(...)
    710  rv = listener->OnStartRequest(request);
    711 
    712  if (NS_SUCCEEDED(rv)) {
    713    // install stream converter if required
    714    nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request);
    715    if (encodedChannel) {
    716      nsCOMPtr<nsIStreamListener> listenerNew;
    717      rv = encodedChannel->DoApplyContentConversions(
    718          listener, getter_AddRefs(listenerNew), aCtxt);
    719      if (NS_SUCCEEDED(rv) && listenerNew) {
    720        MutexAutoLock lock(mMutex);
    721        mNextListener = listenerNew;
    722        listener = listenerNew;
    723      }
    724    }
    725  }
    726 
    727  if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
    728 
    729  // If the request was canceled, then we need to treat that equivalently
    730  // to an error returned by OnStartRequest.
    731  if (NS_SUCCEEDED(rv)) request->GetStatus(&rv);
    732 
    733  // Fire the first OnDataAvailable for the data that was read from the
    734  // stream into the sniffer buffer...
    735  if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) {
    736    uint32_t len = 0;
    737    nsCOMPtr<nsIInputStream> in;
    738    nsCOMPtr<nsIOutputStream> out;
    739 
    740    // Create a pipe and fill it with the data from the sniffer buffer.
    741    NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), MAX_BUFFER_SIZE,
    742               MAX_BUFFER_SIZE);
    743 
    744    rv = out->Write(mBuffer, mBufferLen, &len);
    745    if (NS_SUCCEEDED(rv)) {
    746      if (len == mBufferLen) {
    747        rv = listener->OnDataAvailable(request, in, 0, len);
    748      } else {
    749        NS_ERROR("Unable to write all the data into the pipe.");
    750        rv = NS_ERROR_FAILURE;
    751      }
    752    }
    753  }
    754 
    755  delete[] mBuffer;
    756  mBuffer = nullptr;
    757  mBufferLen = 0;
    758 
    759  return rv;
    760 }
    761 
    762 nsresult nsUnknownDecoder::ConvertEncodedData(nsIRequest* request,
    763                                              const char* data,
    764                                              uint32_t length) {
    765  nsresult rv = NS_OK;
    766 
    767  {
    768    MutexAutoLock lock(mMutex);
    769    mDecodedData = "";
    770  }
    771  nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request));
    772  if (encodedChannel) {
    773    RefPtr<ConvertedStreamListener> strListener =
    774        new ConvertedStreamListener(this);
    775 
    776    nsCOMPtr<nsIStreamListener> listener;
    777    rv = encodedChannel->DoApplyContentConversions(
    778        strListener, getter_AddRefs(listener), nullptr);
    779 
    780    if (NS_FAILED(rv)) {
    781      return rv;
    782    }
    783 
    784    if (listener) {
    785      listener->OnStartRequest(request);
    786 
    787      if (length) {
    788        nsCOMPtr<nsIStringInputStream> rawStream =
    789            do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
    790        if (!rawStream) return NS_ERROR_FAILURE;
    791 
    792        // Other OnDataAvailable callers use `ShareData`, can we use that here?
    793        rv = rawStream->CopyData((const char*)data, length);
    794        NS_ENSURE_SUCCESS(rv, rv);
    795 
    796        rv = listener->OnDataAvailable(request, rawStream, 0, length);
    797        NS_ENSURE_SUCCESS(rv, rv);
    798      }
    799 
    800      listener->OnStopRequest(request, NS_OK);
    801    }
    802  }
    803  return rv;
    804 }
    805 
    806 //
    807 // nsIThreadRetargetableStreamListener methods
    808 //
    809 NS_IMETHODIMP
    810 nsUnknownDecoder::CheckListenerChain() {
    811  nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
    812  {
    813    MutexAutoLock lock(mMutex);
    814    listener = do_QueryInterface(mNextListener);
    815  }
    816  if (!listener) {
    817    return NS_ERROR_NO_INTERFACE;
    818  }
    819 
    820  return listener->CheckListenerChain();
    821 }
    822 
    823 NS_IMETHODIMP
    824 nsUnknownDecoder::OnDataFinished(nsresult aStatus) {
    825  nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
    826  {
    827    MutexAutoLock lock(mMutex);
    828    listener = do_QueryInterface(mNextListener);
    829  }
    830  if (listener) {
    831    return listener->OnDataFinished(aStatus);
    832  }
    833 
    834  return NS_OK;
    835 }
    836 
    837 void nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) {
    838  nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
    839  if (!httpChannel) {
    840    return;
    841  }
    842 
    843  nsCOMPtr<nsILoadInfo> loadInfo = httpChannel->LoadInfo();
    844  if (loadInfo->GetSkipContentSniffing()) {
    845    LastDitchSniff(aRequest);
    846    return;
    847  }
    848  // It's an HTTP channel.  Check for the text/plain mess
    849  nsAutoCString contentTypeHdr;
    850  (void)httpChannel->GetResponseHeader("Content-Type"_ns, contentTypeHdr);
    851  nsAutoCString contentType;
    852  httpChannel->GetContentType(contentType);
    853 
    854  // Make sure to do a case-sensitive exact match comparison here.  Apache
    855  // 1.x just sends text/plain for "unknown", while Apache 2.x sends
    856  // text/plain with a ISO-8859-1 charset.  Debian's Apache version, just to
    857  // be different, sends text/plain with iso-8859-1 charset.  For extra fun,
    858  // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8.  Don't do general
    859  // case-insensitive comparison, since we really want to apply this crap as
    860  // rarely as we can.
    861  if (!contentType.EqualsLiteral("text/plain") ||
    862      (!contentTypeHdr.EqualsLiteral("text/plain") &&
    863       !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") &&
    864       !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") &&
    865       !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) {
    866    return;
    867  }
    868 
    869  // Check whether we have content-encoding.  If we do, don't try to
    870  // detect the type.
    871  // XXXbz we could improve this by doing a local decompress if we
    872  // wanted, I'm sure.
    873  nsAutoCString contentEncoding;
    874  (void)httpChannel->GetResponseHeader("Content-Encoding"_ns, contentEncoding);
    875  if (!contentEncoding.IsEmpty()) {
    876    return;
    877  }
    878 
    879  LastDitchSniff(aRequest);
    880  MutexAutoLock lock(mMutex);
    881  if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) {
    882    // We want to guess at it instead
    883    mContentType = APPLICATION_GUESS_FROM_EXT;
    884  } else {
    885    // Let the text/plain type we already have be, so that other content
    886    // sniffers can also get a shot at this data.
    887    mContentType.Truncate();
    888  }
    889 }