nsUnknownDecoder.cpp (26973B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "nsUnknownDecoder.h" 7 #include "nsIPipe.h" 8 #include "nsIInputStream.h" 9 #include "nsIOutputStream.h" 10 #include "nsMimeTypes.h" 11 12 #include "nsCRT.h" 13 14 #include "nsIMIMEService.h" 15 16 #include "nsIViewSourceChannel.h" 17 #include "nsIHttpChannel.h" 18 #include "nsIForcePendingChannel.h" 19 #include "nsIEncodedChannel.h" 20 #include "nsIURI.h" 21 #include "nsStringStream.h" 22 #include "nsNetCID.h" 23 #include "nsNetUtil.h" 24 #include "nsQueryObject.h" 25 #include "nsComponentManagerUtils.h" 26 #include "nsServiceManagerUtils.h" 27 #include "mozilla/StaticPrefs_network.h" 28 29 #include <algorithm> 30 31 #define MAX_BUFFER_SIZE 512u 32 33 using namespace mozilla; 34 35 NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, nsIStreamListener, 36 nsIRequestObserver) 37 38 nsUnknownDecoder::ConvertedStreamListener::ConvertedStreamListener( 39 nsUnknownDecoder* aDecoder) { 40 mDecoder = aDecoder; 41 } 42 43 nsresult nsUnknownDecoder::ConvertedStreamListener::AppendDataToString( 44 nsIInputStream* inputStream, void* closure, const char* rawSegment, 45 uint32_t toOffset, uint32_t count, uint32_t* writeCount) { 46 nsCString* decodedData = static_cast<nsCString*>(closure); 47 decodedData->Append(rawSegment, count); 48 *writeCount = count; 49 return NS_OK; 50 } 51 52 NS_IMETHODIMP 53 nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request) { 54 return NS_OK; 55 } 56 57 NS_IMETHODIMP 58 nsUnknownDecoder::ConvertedStreamListener::OnDataAvailable( 59 nsIRequest* request, nsIInputStream* stream, uint64_t offset, 60 uint32_t count) { 61 uint32_t read; 62 nsAutoCString decodedData; 63 { 64 MutexAutoLock lock(mDecoder->mMutex); 65 decodedData = mDecoder->mDecodedData; 66 } 67 nsresult rv = 68 stream->ReadSegments(AppendDataToString, &decodedData, count, &read); 69 if (NS_FAILED(rv)) { 70 return rv; 71 } 72 MutexAutoLock lock(mDecoder->mMutex); 73 mDecoder->mDecodedData = decodedData; 74 return NS_OK; 75 } 76 77 NS_IMETHODIMP 78 nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request, 79 nsresult status) { 80 return NS_OK; 81 } 82 83 nsUnknownDecoder::nsUnknownDecoder(nsIStreamListener* aListener) 84 : mNextListener(aListener), 85 mBuffer(nullptr), 86 mBufferLen(0), 87 mMutex("nsUnknownDecoder"), 88 mDecodedData("") {} 89 90 nsUnknownDecoder::~nsUnknownDecoder() { 91 if (mBuffer) { 92 delete[] mBuffer; 93 mBuffer = nullptr; 94 } 95 } 96 97 // ---- 98 // 99 // nsISupports implementation... 100 // 101 // ---- 102 103 NS_IMPL_ADDREF(nsUnknownDecoder) 104 NS_IMPL_RELEASE(nsUnknownDecoder) 105 106 NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) 107 NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) 108 NS_INTERFACE_MAP_ENTRY(nsIStreamListener) 109 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) 110 NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) 111 NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener) 112 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, 113 nsIThreadRetargetableStreamListener) 114 NS_INTERFACE_MAP_END 115 116 // ---- 117 // 118 // nsIStreamConverter methods... 119 // 120 // ---- 121 122 NS_IMETHODIMP 123 nsUnknownDecoder::Convert(nsIInputStream* aFromStream, const char* aFromType, 124 const char* aToType, nsISupports* aCtxt, 125 nsIInputStream** aResultStream) { 126 return NS_ERROR_NOT_IMPLEMENTED; 127 } 128 129 NS_IMETHODIMP 130 nsUnknownDecoder::AsyncConvertData(const char* aFromType, const char* aToType, 131 nsIStreamListener* aListener, 132 nsISupports* aCtxt) { 133 NS_ASSERTION(aListener && aFromType && aToType, 134 "null pointer passed into multi mixed converter"); 135 // hook up our final listener. this guy gets the various On*() calls we want 136 // to throw at him. 137 // 138 139 MutexAutoLock lock(mMutex); 140 mNextListener = aListener; 141 return (aListener) ? NS_OK : NS_ERROR_FAILURE; 142 } 143 144 NS_IMETHODIMP 145 nsUnknownDecoder::GetConvertedType(const nsACString& aFromType, 146 nsIChannel* aChannel, nsACString& aToType) { 147 return NS_ERROR_NOT_IMPLEMENTED; 148 } 149 150 // ---- 151 // 152 // nsIStreamListener methods... 153 // 154 // ---- 155 156 NS_IMETHODIMP 157 nsUnknownDecoder::OnDataAvailable(nsIRequest* request, nsIInputStream* aStream, 158 uint64_t aSourceOffset, uint32_t aCount) { 159 nsresult rv = NS_OK; 160 161 bool contentTypeEmpty; 162 { 163 MutexAutoLock lock(mMutex); 164 if (!mNextListener) return NS_ERROR_FAILURE; 165 166 contentTypeEmpty = mContentType.IsEmpty(); 167 } 168 169 if (contentTypeEmpty) { 170 uint32_t count, len; 171 172 // If the buffer has not been allocated by now, just fail... 173 if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; 174 175 // 176 // Determine how much of the stream should be read to fill up the 177 // sniffer buffer... 178 // 179 if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { 180 count = MAX_BUFFER_SIZE - mBufferLen; 181 } else { 182 count = aCount; 183 } 184 185 // Read the data into the buffer... 186 rv = aStream->Read((mBuffer + mBufferLen), count, &len); 187 if (NS_FAILED(rv)) return rv; 188 189 mBufferLen += len; 190 aCount -= len; 191 192 if (aCount) { 193 // 194 // Adjust the source offset... The call to FireListenerNotifications(...) 195 // will make the first OnDataAvailable(...) call with an offset of 0. 196 // So, this offset needs to be adjusted to reflect that... 197 // 198 aSourceOffset += mBufferLen; 199 200 DetermineContentType(request); 201 202 rv = FireListenerNotifications(request, nullptr); 203 } 204 } 205 206 // Must not fire ODA again if it failed once 207 if (aCount && NS_SUCCEEDED(rv)) { 208 #ifdef DEBUG 209 { 210 MutexAutoLock lock(mMutex); 211 NS_ASSERTION(!mContentType.IsEmpty(), 212 "Content type should be known by now."); 213 } 214 #endif 215 216 nsCOMPtr<nsIStreamListener> listener; 217 { 218 MutexAutoLock lock(mMutex); 219 listener = mNextListener; 220 } 221 rv = listener->OnDataAvailable(request, aStream, aSourceOffset, aCount); 222 } 223 224 return rv; 225 } 226 227 NS_IMETHODIMP 228 nsUnknownDecoder::MaybeRetarget(nsIRequest* request) { 229 return NS_ERROR_NOT_IMPLEMENTED; 230 } 231 232 // ---- 233 // 234 // nsIRequestObserver methods... 235 // 236 // ---- 237 238 NS_IMETHODIMP 239 nsUnknownDecoder::OnStartRequest(nsIRequest* request) { 240 nsresult rv = NS_OK; 241 242 { 243 MutexAutoLock lock(mMutex); 244 if (!mNextListener) return NS_ERROR_FAILURE; 245 } 246 247 // Allocate the sniffer buffer... 248 if (NS_SUCCEEDED(rv) && !mBuffer) { 249 mBuffer = new char[MAX_BUFFER_SIZE]; 250 251 if (!mBuffer) { 252 rv = NS_ERROR_OUT_OF_MEMORY; 253 } 254 } 255 256 // Do not pass the OnStartRequest on to the next listener (yet)... 257 return rv; 258 } 259 260 NS_IMETHODIMP 261 nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsresult aStatus) { 262 nsresult rv = NS_OK; 263 264 bool contentTypeEmpty; 265 { 266 MutexAutoLock lock(mMutex); 267 if (!mNextListener) return NS_ERROR_FAILURE; 268 269 contentTypeEmpty = mContentType.IsEmpty(); 270 } 271 272 // 273 // The total amount of data is less than the size of the sniffer buffer. 274 // Analyze the buffer now... 275 // 276 if (contentTypeEmpty) { 277 DetermineContentType(request); 278 279 // Make sure channel listeners see channel as pending while we call 280 // OnStartRequest/OnDataAvailable, even though the underlying channel 281 // has already hit OnStopRequest. 282 nsCOMPtr<nsIForcePendingChannel> forcePendingChannel = 283 do_QueryInterface(request); 284 if (forcePendingChannel) { 285 forcePendingChannel->ForcePending(true); 286 } 287 288 rv = FireListenerNotifications(request, nullptr); 289 290 if (NS_FAILED(rv)) { 291 aStatus = rv; 292 } 293 294 // now we need to set pending state to false before calling OnStopRequest 295 if (forcePendingChannel) { 296 forcePendingChannel->ForcePending(false); 297 } 298 } 299 300 nsCOMPtr<nsIStreamListener> listener; 301 { 302 MutexAutoLock lock(mMutex); 303 listener = mNextListener; 304 mNextListener = nullptr; 305 } 306 rv = listener->OnStopRequest(request, aStatus); 307 308 return rv; 309 } 310 311 // ---- 312 // 313 // nsIContentSniffer methods... 314 // 315 // ---- 316 NS_IMETHODIMP 317 nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, 318 const uint8_t* aData, uint32_t aLength, 319 nsACString& type) { 320 // This is only used by sniffer, therefore we do not need to lock anything 321 // here. 322 nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); 323 if (channel) { 324 nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); 325 if (loadInfo->GetSkipContentSniffing()) { 326 return NS_ERROR_NOT_AVAILABLE; 327 } 328 } 329 330 mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData)); 331 mBufferLen = aLength; 332 DetermineContentType(aRequest); 333 mBuffer = nullptr; 334 mBufferLen = 0; 335 type.Assign(mContentType); 336 mContentType.Truncate(); 337 return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; 338 } 339 340 // Actual sniffing code 341 342 /** 343 * This is the array of sniffer entries that depend on "magic numbers" 344 * in the file. Each entry has either a type associated with it (set 345 * these with the SNIFFER_ENTRY macro) or a function to be executed 346 * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function 347 * should take a single nsIRequest* and returns bool -- true if 348 * it sets mContentType, false otherwise 349 */ 350 nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { 351 SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), 352 353 SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), 354 355 // Files that start with mailbox delimiters let's provisionally call 356 // text/plain 357 SNIFFER_ENTRY("From", TEXT_PLAIN), SNIFFER_ENTRY(">From", TEXT_PLAIN), 358 359 // If the buffer begins with "#!" or "%!" then it is a script of 360 // some sort... "Scripts" can include arbitrary data to be passed 361 // to an interpreter, so we need to decide whether we can call this 362 // text or whether it's data. 363 SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), 364 365 // XXXbz should (and can) we also include the various ways that <?xml can 366 // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing 367 SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)}; 368 369 uint32_t nsUnknownDecoder::sSnifferEntryNum = 370 sizeof(nsUnknownDecoder::sSnifferEntries) / 371 sizeof(nsUnknownDecoder::nsSnifferEntry); 372 373 void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) { 374 { 375 MutexAutoLock lock(mMutex); 376 NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known."); 377 if (!mContentType.IsEmpty()) return; 378 } 379 380 nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); 381 if (channel) { 382 nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); 383 if (loadInfo->GetSkipContentSniffing()) { 384 /* 385 * If we did not get a useful Content-Type from the server 386 * but also have sniffing disabled, just determine whether 387 * to use text/plain or octetstream and log an error to the Console 388 */ 389 LastDitchSniff(aRequest); 390 391 nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(aRequest)); 392 if (httpChannel) { 393 nsAutoCString type; 394 httpChannel->GetContentType(type); 395 nsCOMPtr<nsIURI> requestUri; 396 httpChannel->GetURI(getter_AddRefs(requestUri)); 397 nsAutoCString spec; 398 requestUri->GetSpec(spec); 399 if (spec.Length() > 50) { 400 spec.Truncate(50); 401 spec.AppendLiteral("..."); 402 } 403 httpChannel->LogMimeTypeMismatch( 404 "XTCOWithMIMEValueMissing"_ns, false, NS_ConvertUTF8toUTF16(spec), 405 // Type is not used in the Error Message but required 406 NS_ConvertUTF8toUTF16(type)); 407 } 408 return; 409 } 410 } 411 412 const char* testData = mBuffer; 413 uint32_t testDataLen = mBufferLen; 414 // Check if data are compressed. 415 nsAutoCString decodedData; 416 417 if (channel) { 418 // ConvertEncodedData is always called only on a single thread for each 419 // instance of an object. 420 nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen); 421 if (NS_SUCCEEDED(rv)) { 422 MutexAutoLock lock(mMutex); 423 decodedData = mDecodedData; 424 } 425 if (!decodedData.IsEmpty()) { 426 testData = decodedData.get(); 427 testDataLen = std::min<uint32_t>(decodedData.Length(), MAX_BUFFER_SIZE); 428 } 429 } 430 431 // First, run through all the types we can detect reliably based on 432 // magic numbers 433 uint32_t i; 434 for (i = 0; i < sSnifferEntryNum; ++i) { 435 if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data 436 memcmp(testData, sSnifferEntries[i].mBytes, 437 sSnifferEntries[i].mByteLen) == 0) { // and type matches 438 NS_ASSERTION( 439 sSnifferEntries[i].mMimeType || 440 sSnifferEntries[i].mContentTypeSniffer, 441 "Must have either a type string or a function to set the type"); 442 NS_ASSERTION(!sSnifferEntries[i].mMimeType || 443 !sSnifferEntries[i].mContentTypeSniffer, 444 "Both a type string and a type sniffing function set;" 445 " using type string"); 446 if (sSnifferEntries[i].mMimeType) { 447 MutexAutoLock lock(mMutex); 448 mContentType = sSnifferEntries[i].mMimeType; 449 NS_ASSERTION(!mContentType.IsEmpty(), 450 "Content type should be known by now."); 451 return; 452 } 453 if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { 454 #ifdef DEBUG 455 MutexAutoLock lock(mMutex); 456 NS_ASSERTION(!mContentType.IsEmpty(), 457 "Content type should be known by now."); 458 #endif 459 return; 460 } 461 } 462 } 463 464 nsAutoCString sniffedType; 465 NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, (const uint8_t*)testData, 466 testDataLen, sniffedType); 467 { 468 MutexAutoLock lock(mMutex); 469 mContentType = sniffedType; 470 if (!mContentType.IsEmpty()) { 471 return; 472 } 473 } 474 475 if (SniffForHTML(aRequest)) { 476 #ifdef DEBUG 477 MutexAutoLock lock(mMutex); 478 NS_ASSERTION(!mContentType.IsEmpty(), 479 "Content type should be known by now."); 480 #endif 481 return; 482 } 483 484 nsCOMPtr<nsIURI> uri; 485 NS_GetFinalChannelURI(channel, getter_AddRefs(uri)); 486 487 // We don't know what this is yet. Before we just give up, try 488 // the URI from the request. 489 if ((StaticPrefs::network_sniff_use_extension() || 490 (uri && uri->SchemeIs("file"))) && 491 SniffURI(aRequest)) { 492 #ifdef DEBUG 493 MutexAutoLock lock(mMutex); 494 NS_ASSERTION(!mContentType.IsEmpty(), 495 "Content type should be known by now."); 496 #endif 497 return; 498 } 499 500 LastDitchSniff(aRequest); 501 #ifdef DEBUG 502 MutexAutoLock lock(mMutex); 503 NS_ASSERTION(!mContentType.IsEmpty(), "Content type should be known by now."); 504 #endif 505 } 506 507 // https://mimesniff.spec.whatwg.org/#identifying-a-resource-with-an-unknown-mime-type 508 bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) { 509 MutexAutoLock lock(mMutex); 510 511 // Now look for HTML. 512 const char* str; 513 const char* end; 514 if (mDecodedData.IsEmpty()) { 515 str = mBuffer; 516 end = mBuffer + mBufferLen; 517 } else { 518 str = mDecodedData.get(); 519 end = mDecodedData.get() + 520 std::min<uint32_t>(mDecodedData.Length(), MAX_BUFFER_SIZE); 521 } 522 523 // skip leading whitespace 524 while (str != end && nsCRT::IsAsciiSpace(*str)) { 525 ++str; 526 } 527 528 // did we find something like a start tag? 529 if (str == end || *str != '<' || ++str == end) { 530 return false; 531 } 532 533 uint32_t bufSize = end - str; 534 nsDependentCSubstring substr(str, bufSize); 535 536 if (StringBeginsWith(substr, "?xml"_ns)) { 537 mContentType = TEXT_XML; 538 return true; 539 } 540 541 // We use sizeof(_tagstr) below because that's the length of _tagstr 542 // with the one char " " or ">" appended. 543 #define MATCHES_TAG(_tagstr) \ 544 (substr.Length() >= sizeof(_tagstr) && \ 545 StringBeginsWith(substr, _tagstr##_ns, \ 546 nsCaseInsensitiveCStringComparator) && \ 547 (substr[sizeof(_tagstr) - 1] == ' ' || substr[sizeof(_tagstr) - 1] == '>')) 548 549 if (MATCHES_TAG("!DOCTYPE HTML") || MATCHES_TAG("html") || 550 MATCHES_TAG("head") || MATCHES_TAG("script") || MATCHES_TAG("iframe") || 551 MATCHES_TAG("h1") || MATCHES_TAG("div") || MATCHES_TAG("font") || 552 MATCHES_TAG("table") || MATCHES_TAG("a") || MATCHES_TAG("style") || 553 MATCHES_TAG("title") || MATCHES_TAG("b") || MATCHES_TAG("body") || 554 MATCHES_TAG("br") || MATCHES_TAG("p") || MATCHES_TAG("!--")) { 555 mContentType = TEXT_HTML; 556 return true; 557 } 558 559 if (StaticPrefs::network_mimesniff_extra_moz_html_tags()) { 560 if (MATCHES_TAG("frameset") || MATCHES_TAG("img") || MATCHES_TAG("link") || 561 MATCHES_TAG("base") || MATCHES_TAG("applet") || MATCHES_TAG("meta") || 562 MATCHES_TAG("center") || MATCHES_TAG("form") || 563 MATCHES_TAG("isindex") || MATCHES_TAG("h2") || MATCHES_TAG("h3") || 564 MATCHES_TAG("h4") || MATCHES_TAG("h5") || MATCHES_TAG("h6") || 565 MATCHES_TAG("pre")) { 566 mContentType = TEXT_HTML; 567 return true; 568 } 569 } 570 571 #undef MATCHES_TAG 572 573 return false; 574 } 575 576 bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) { 577 // First see whether we can glean anything from the uri... 578 if (!StaticPrefs::network_sniff_use_extension() || !SniffURI(aRequest)) { 579 // Oh well; just generic XML will have to do 580 MutexAutoLock lock(mMutex); 581 mContentType = TEXT_XML; 582 } 583 584 return true; 585 } 586 587 bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) { 588 nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); 589 nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); 590 if (loadInfo->GetSkipContentSniffing()) { 591 return false; 592 } 593 nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1")); 594 if (mimeService) { 595 nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); 596 if (channel) { 597 nsCOMPtr<nsIURI> uri; 598 nsresult result = channel->GetURI(getter_AddRefs(uri)); 599 if (NS_SUCCEEDED(result) && uri) { 600 nsAutoCString type; 601 result = mimeService->GetTypeFromURI(uri, type); 602 if (NS_SUCCEEDED(result)) { 603 MutexAutoLock lock(mMutex); 604 mContentType = type; 605 return true; 606 } 607 } 608 } 609 } 610 611 return false; 612 } 613 614 // This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 615 // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by 616 // encodings like Shift_JIS) as non-text 617 #define IS_TEXT_CHAR(ch) \ 618 (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) 619 620 bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) { 621 // All we can do now is try to guess whether this is text/plain or 622 // application/octet-stream 623 624 MutexAutoLock lock(mMutex); 625 626 const char* testData; 627 uint32_t testDataLen; 628 if (mDecodedData.IsEmpty()) { 629 testData = mBuffer; 630 // Since some legacy text files end with 0x1A, reading the entire buffer 631 // will lead misdetection. 632 testDataLen = std::min<uint32_t>(mBufferLen, MAX_BUFFER_SIZE); 633 } else { 634 testData = mDecodedData.get(); 635 testDataLen = std::min<uint32_t>(mDecodedData.Length(), MAX_BUFFER_SIZE); 636 } 637 638 // First, check for a BOM. If we see one, assume this is text/plain 639 // in whatever encoding. If there is a BOM _and_ text we will 640 // always have at least 4 bytes in the buffer (since the 2-byte BOMs 641 // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). 642 if (testDataLen >= 4) { 643 const unsigned char* buf = (const unsigned char*)testData; 644 if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian 645 (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian 646 (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 647 (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && 648 buf[3] == 0xFF)) { // UCS-4, Big Endian 649 650 mContentType = TEXT_PLAIN; 651 return true; 652 } 653 } 654 655 // Now see whether the buffer has any non-text chars. If not, then let's 656 // just call it text/plain... 657 // 658 uint32_t i; 659 for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) { 660 } 661 662 if (i == testDataLen) { 663 mContentType = TEXT_PLAIN; 664 } else { 665 mContentType = APPLICATION_OCTET_STREAM; 666 } 667 668 return true; 669 } 670 671 nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, 672 nsISupports* aCtxt) { 673 nsresult rv = NS_OK; 674 675 nsCOMPtr<nsIStreamListener> listener; 676 nsAutoCString contentType; 677 { 678 MutexAutoLock lock(mMutex); 679 if (!mNextListener) return NS_ERROR_FAILURE; 680 681 listener = mNextListener; 682 contentType = mContentType; 683 } 684 685 if (!contentType.IsEmpty()) { 686 nsCOMPtr<nsIViewSourceChannel> viewSourceChannel = 687 do_QueryInterface(request); 688 if (viewSourceChannel) { 689 rv = viewSourceChannel->SetOriginalContentType(contentType); 690 } else { 691 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv); 692 if (NS_SUCCEEDED(rv)) { 693 // Set the new content type on the channel... 694 rv = channel->SetContentType(contentType); 695 } 696 } 697 698 NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); 699 700 if (NS_FAILED(rv)) { 701 // Cancel the request to make sure it has the correct status if 702 // mNextListener looks at it. 703 request->Cancel(rv); 704 listener->OnStartRequest(request); 705 return rv; 706 } 707 } 708 709 // Fire the OnStartRequest(...) 710 rv = listener->OnStartRequest(request); 711 712 if (NS_SUCCEEDED(rv)) { 713 // install stream converter if required 714 nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request); 715 if (encodedChannel) { 716 nsCOMPtr<nsIStreamListener> listenerNew; 717 rv = encodedChannel->DoApplyContentConversions( 718 listener, getter_AddRefs(listenerNew), aCtxt); 719 if (NS_SUCCEEDED(rv) && listenerNew) { 720 MutexAutoLock lock(mMutex); 721 mNextListener = listenerNew; 722 listener = listenerNew; 723 } 724 } 725 } 726 727 if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; 728 729 // If the request was canceled, then we need to treat that equivalently 730 // to an error returned by OnStartRequest. 731 if (NS_SUCCEEDED(rv)) request->GetStatus(&rv); 732 733 // Fire the first OnDataAvailable for the data that was read from the 734 // stream into the sniffer buffer... 735 if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { 736 uint32_t len = 0; 737 nsCOMPtr<nsIInputStream> in; 738 nsCOMPtr<nsIOutputStream> out; 739 740 // Create a pipe and fill it with the data from the sniffer buffer. 741 NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), MAX_BUFFER_SIZE, 742 MAX_BUFFER_SIZE); 743 744 rv = out->Write(mBuffer, mBufferLen, &len); 745 if (NS_SUCCEEDED(rv)) { 746 if (len == mBufferLen) { 747 rv = listener->OnDataAvailable(request, in, 0, len); 748 } else { 749 NS_ERROR("Unable to write all the data into the pipe."); 750 rv = NS_ERROR_FAILURE; 751 } 752 } 753 } 754 755 delete[] mBuffer; 756 mBuffer = nullptr; 757 mBufferLen = 0; 758 759 return rv; 760 } 761 762 nsresult nsUnknownDecoder::ConvertEncodedData(nsIRequest* request, 763 const char* data, 764 uint32_t length) { 765 nsresult rv = NS_OK; 766 767 { 768 MutexAutoLock lock(mMutex); 769 mDecodedData = ""; 770 } 771 nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request)); 772 if (encodedChannel) { 773 RefPtr<ConvertedStreamListener> strListener = 774 new ConvertedStreamListener(this); 775 776 nsCOMPtr<nsIStreamListener> listener; 777 rv = encodedChannel->DoApplyContentConversions( 778 strListener, getter_AddRefs(listener), nullptr); 779 780 if (NS_FAILED(rv)) { 781 return rv; 782 } 783 784 if (listener) { 785 listener->OnStartRequest(request); 786 787 if (length) { 788 nsCOMPtr<nsIStringInputStream> rawStream = 789 do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); 790 if (!rawStream) return NS_ERROR_FAILURE; 791 792 // Other OnDataAvailable callers use `ShareData`, can we use that here? 793 rv = rawStream->CopyData((const char*)data, length); 794 NS_ENSURE_SUCCESS(rv, rv); 795 796 rv = listener->OnDataAvailable(request, rawStream, 0, length); 797 NS_ENSURE_SUCCESS(rv, rv); 798 } 799 800 listener->OnStopRequest(request, NS_OK); 801 } 802 } 803 return rv; 804 } 805 806 // 807 // nsIThreadRetargetableStreamListener methods 808 // 809 NS_IMETHODIMP 810 nsUnknownDecoder::CheckListenerChain() { 811 nsCOMPtr<nsIThreadRetargetableStreamListener> listener; 812 { 813 MutexAutoLock lock(mMutex); 814 listener = do_QueryInterface(mNextListener); 815 } 816 if (!listener) { 817 return NS_ERROR_NO_INTERFACE; 818 } 819 820 return listener->CheckListenerChain(); 821 } 822 823 NS_IMETHODIMP 824 nsUnknownDecoder::OnDataFinished(nsresult aStatus) { 825 nsCOMPtr<nsIThreadRetargetableStreamListener> listener; 826 { 827 MutexAutoLock lock(mMutex); 828 listener = do_QueryInterface(mNextListener); 829 } 830 if (listener) { 831 return listener->OnDataFinished(aStatus); 832 } 833 834 return NS_OK; 835 } 836 837 void nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) { 838 nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest); 839 if (!httpChannel) { 840 return; 841 } 842 843 nsCOMPtr<nsILoadInfo> loadInfo = httpChannel->LoadInfo(); 844 if (loadInfo->GetSkipContentSniffing()) { 845 LastDitchSniff(aRequest); 846 return; 847 } 848 // It's an HTTP channel. Check for the text/plain mess 849 nsAutoCString contentTypeHdr; 850 (void)httpChannel->GetResponseHeader("Content-Type"_ns, contentTypeHdr); 851 nsAutoCString contentType; 852 httpChannel->GetContentType(contentType); 853 854 // Make sure to do a case-sensitive exact match comparison here. Apache 855 // 1.x just sends text/plain for "unknown", while Apache 2.x sends 856 // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to 857 // be different, sends text/plain with iso-8859-1 charset. For extra fun, 858 // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general 859 // case-insensitive comparison, since we really want to apply this crap as 860 // rarely as we can. 861 if (!contentType.EqualsLiteral("text/plain") || 862 (!contentTypeHdr.EqualsLiteral("text/plain") && 863 !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && 864 !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && 865 !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { 866 return; 867 } 868 869 // Check whether we have content-encoding. If we do, don't try to 870 // detect the type. 871 // XXXbz we could improve this by doing a local decompress if we 872 // wanted, I'm sure. 873 nsAutoCString contentEncoding; 874 (void)httpChannel->GetResponseHeader("Content-Encoding"_ns, contentEncoding); 875 if (!contentEncoding.IsEmpty()) { 876 return; 877 } 878 879 LastDitchSniff(aRequest); 880 MutexAutoLock lock(mMutex); 881 if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) { 882 // We want to guess at it instead 883 mContentType = APPLICATION_GUESS_FROM_EXT; 884 } else { 885 // Let the text/plain type we already have be, so that other content 886 // sniffers can also get a shot at this data. 887 mContentType.Truncate(); 888 } 889 }