tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsEffectiveTLDService.cpp (16140B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // This service reads a file of rules describing TLD-like domain names.  For a
      8 // complete description of the expected file format and parsing rules, see
      9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
     10 
     11 #include "mozilla/Components.h"
     12 #include "mozilla/ClearOnShutdown.h"
     13 #include "mozilla/MemoryReporting.h"
     14 
     15 #include "MainThreadUtils.h"
     16 #include "nsContentUtils.h"
     17 #include "nsCRT.h"
     18 #include "nsEffectiveTLDService.h"
     19 #include "nsIFile.h"
     20 #include "nsIURI.h"
     21 #include "nsNetCID.h"
     22 #include "nsNetUtil.h"
     23 #include "nsServiceManagerUtils.h"
     24 #include "mozilla/net/DNS.h"
     25 
     26 namespace etld_dafsa {
     27 
     28 // Generated file that includes kDafsa
     29 #include "etld_data.inc"
     30 
     31 }  // namespace etld_dafsa
     32 
     33 using namespace mozilla;
     34 
     35 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
     36                  nsIMemoryReporter)
     37 
     38 // ----------------------------------------------------------------------
     39 
     40 static StaticRefPtr<nsEffectiveTLDService> gService;
     41 
     42 nsEffectiveTLDService::nsEffectiveTLDService() : mGraph(etld_dafsa::kDafsa) {}
     43 
     44 nsresult nsEffectiveTLDService::Init() {
     45  MOZ_ASSERT(NS_IsMainThread());
     46 
     47  if (gService) {
     48    return NS_ERROR_ALREADY_INITIALIZED;
     49  }
     50 
     51  RegisterWeakMemoryReporter(this);
     52 
     53  return NS_OK;
     54 }
     55 
     56 nsEffectiveTLDService::~nsEffectiveTLDService() {
     57  UnregisterWeakMemoryReporter(this);
     58 }
     59 
     60 // static
     61 already_AddRefed<nsIEffectiveTLDService>
     62 nsEffectiveTLDService::GetXPCOMSingleton() {
     63  if (gService) {
     64    return do_AddRef(gService);
     65  }
     66  RefPtr<nsEffectiveTLDService> instance = new nsEffectiveTLDService();
     67  nsresult rv = instance->Init();
     68  if (NS_FAILED(rv)) {
     69    return nullptr;
     70  }
     71  gService = instance;
     72  ClearOnShutdown(&gService);
     73  return instance.forget();
     74 }
     75 
     76 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
     77 
     78 // The amount of heap memory measured here is tiny. It used to be bigger when
     79 // nsEffectiveTLDService used a separate hash table instead of binary search.
     80 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
     81 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
     82 NS_IMETHODIMP
     83 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
     84                                      nsISupports* aData, bool aAnonymize) {
     85  MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP,
     86                     UNITS_BYTES,
     87                     SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
     88                     "Memory used by the effective TLD service.");
     89 
     90  return NS_OK;
     91 }
     92 
     93 size_t nsEffectiveTLDService::SizeOfIncludingThis(
     94    mozilla::MallocSizeOf aMallocSizeOf) {
     95  size_t n = aMallocSizeOf(this);
     96 
     97  return n;
     98 }
     99 
    100 // External function for dealing with URI's correctly.
    101 // Pulls out the host portion from an nsIURI, and calls through to
    102 // GetPublicSuffixFromHost().
    103 NS_IMETHODIMP
    104 nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI,
    105                                       nsACString& aPublicSuffix) {
    106  NS_ENSURE_ARG_POINTER(aURI);
    107 
    108  nsAutoCString host;
    109  nsresult rv = NS_GetInnermostURIHost(aURI, host);
    110  if (NS_FAILED(rv)) {
    111    return rv;
    112  }
    113 
    114  return GetBaseDomainInternal(host, 0, false, aPublicSuffix);
    115 }
    116 
    117 NS_IMETHODIMP
    118 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI,
    119                                            nsACString& aPublicSuffix) {
    120  NS_ENSURE_ARG_POINTER(aURI);
    121 
    122  nsAutoCString host;
    123  nsresult rv = NS_GetInnermostURIHost(aURI, host);
    124  if (NS_FAILED(rv)) {
    125    return rv;
    126  }
    127 
    128  return GetBaseDomainInternal(host, 0, true, aPublicSuffix);
    129 }
    130 
    131 // External function for dealing with URI's correctly.
    132 // Pulls out the host portion from an nsIURI, and calls through to
    133 // GetBaseDomainFromHost().
    134 NS_IMETHODIMP
    135 nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts,
    136                                     nsACString& aBaseDomain) {
    137  NS_ENSURE_ARG_POINTER(aURI);
    138  NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
    139 
    140  nsAutoCString host;
    141  nsresult rv = NS_GetInnermostURIHost(aURI, host);
    142  if (NS_FAILED(rv)) {
    143    return rv;
    144  }
    145 
    146  return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain);
    147 }
    148 
    149 // External function for dealing with URIs to get a schemeless site.
    150 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
    151 // just returning their serialized host.
    152 NS_IMETHODIMP
    153 nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) {
    154  NS_ENSURE_ARG_POINTER(aURI);
    155 
    156  nsresult rv = GetBaseDomain(aURI, 0, aSite);
    157  if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
    158      rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
    159    rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite);
    160  }
    161  return rv;
    162 }
    163 
    164 // Variant of GetSchemelessSite which accepts a host string instead of a URI.
    165 NS_IMETHODIMP
    166 nsEffectiveTLDService::GetSchemelessSiteFromHost(const nsACString& aHostname,
    167                                                 nsACString& aSite) {
    168  NS_ENSURE_TRUE(!aHostname.IsEmpty(), NS_ERROR_FAILURE);
    169 
    170  nsresult rv = GetBaseDomainFromHost(aHostname, 0, aSite);
    171  if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
    172      rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
    173    aSite.Assign(aHostname);
    174    nsContentUtils::MaybeFixIPv6Host(aSite);
    175 
    176    return NS_OK;
    177  }
    178  return rv;
    179 }
    180 
    181 // External function for dealing with URIs to get site correctly.
    182 // Calls through to GetSchemelessSite(), and serializes with the scheme and
    183 // "://" prepended.
    184 NS_IMETHODIMP
    185 nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) {
    186  NS_ENSURE_ARG_POINTER(aURI);
    187 
    188  nsAutoCString scheme;
    189  nsresult rv = aURI->GetScheme(scheme);
    190  NS_ENSURE_SUCCESS(rv, rv);
    191 
    192  nsAutoCString schemeless;
    193  rv = GetSchemelessSite(aURI, schemeless);
    194  NS_ENSURE_SUCCESS(rv, rv);
    195 
    196  // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
    197  if (schemeless.Length() == 1 && schemeless.Last() == '.') {
    198    return NS_ERROR_INVALID_ARG;
    199  }
    200 
    201  // Reject any URIs without a host that aren't file:// URIs.
    202  if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) {
    203    return NS_ERROR_INVALID_ARG;
    204  }
    205 
    206  aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length());
    207  aSite.Append(scheme);
    208  aSite.Append("://"_ns);
    209  aSite.Append(schemeless);
    210 
    211  return NS_OK;
    212 }
    213 
    214 // External function for dealing with a host string directly: finds the public
    215 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
    216 NS_IMETHODIMP
    217 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname,
    218                                               nsACString& aPublicSuffix) {
    219  // This will fail if the hostname includes invalid characters.
    220  nsAutoCString normHostname;
    221  nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
    222  if (NS_FAILED(rv)) {
    223    return rv;
    224  }
    225 
    226  return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix);
    227 }
    228 
    229 NS_IMETHODIMP
    230 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname,
    231                                                    nsACString& aPublicSuffix) {
    232  // This will fail if the hostname includes invalid characters.
    233  nsAutoCString normHostname;
    234  nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
    235  if (NS_FAILED(rv)) {
    236    return rv;
    237  }
    238 
    239  return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix);
    240 }
    241 
    242 // External function for dealing with a host string directly: finds the base
    243 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
    244 // requested. See GetBaseDomainInternal().
    245 NS_IMETHODIMP
    246 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname,
    247                                             uint32_t aAdditionalParts,
    248                                             nsACString& aBaseDomain) {
    249  NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
    250 
    251  // This will fail if the hostname includes invalid characters.
    252  nsAutoCString normHostname;
    253  nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
    254  if (NS_FAILED(rv)) {
    255    return rv;
    256  }
    257 
    258  return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false,
    259                               aBaseDomain);
    260 }
    261 
    262 NS_IMETHODIMP
    263 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
    264                                        nsACString& aBaseDomain) {
    265  // This will fail if the hostname includes invalid characters.
    266  nsAutoCString normHostname;
    267  nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname);
    268  if (NS_FAILED(rv)) {
    269    return rv;
    270  }
    271 
    272  return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain);
    273 }
    274 
    275 // Finds the base domain for a host, with requested number of additional parts.
    276 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
    277 // if more subdomain parts are requested than are available, or if the hostname
    278 // includes characters that are not valid in a URL. Normalization is performed
    279 // on the host string and the result will be in UTF8.
    280 nsresult nsEffectiveTLDService::GetBaseDomainInternal(
    281    nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix,
    282    nsACString& aBaseDomain) {
    283  const int kExceptionRule = 1;
    284  const int kWildcardRule = 2;
    285 
    286  if (aHostname.IsEmpty()) {
    287    return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
    288  }
    289 
    290  // chomp any trailing dot, and keep track of it for later
    291  bool trailingDot = aHostname.Last() == '.';
    292  if (trailingDot) {
    293    aHostname.Truncate(aHostname.Length() - 1);
    294  }
    295 
    296  // check the edge cases of the host being '.' or having a second trailing '.',
    297  // since subsequent checks won't catch it.
    298  if (aHostname.IsEmpty() || aHostname.Last() == '.') {
    299    return NS_ERROR_INVALID_ARG;
    300  }
    301 
    302  // Lookup in the cache if this is a normal query. This is restricted to
    303  // main thread-only as the cache is not thread-safe.
    304  Maybe<TldCache::Entry> entry;
    305  if (aAdditionalParts == 1 && NS_IsMainThread()) {
    306    auto p = mMruTable.Lookup(aHostname);
    307    if (p) {
    308      if (NS_FAILED(p.Data().mResult)) {
    309        return p.Data().mResult;
    310      }
    311 
    312      // There was a match, just return the cached value.
    313      aBaseDomain = p.Data().mBaseDomain;
    314      if (trailingDot) {
    315        aBaseDomain.Append('.');
    316      }
    317 
    318      return NS_OK;
    319    }
    320 
    321    entry = Some(p);
    322  }
    323 
    324  // Check if we're dealing with an IPv4/IPv6 hostname, and return
    325  if (mozilla::net::HostIsIPLiteral(aHostname)) {
    326    // Update the MRU table if in use.
    327    if (entry) {
    328      entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS});
    329    }
    330 
    331    return NS_ERROR_HOST_IS_IP_ADDRESS;
    332  }
    333 
    334  // Walk up the domain tree, most specific to least specific,
    335  // looking for matches at each level.  Note that a given level may
    336  // have multiple attributes (e.g. IsWild() and IsNormal()).
    337  const char* prevDomain = nullptr;
    338  const char* currDomain = aHostname.get();
    339  const char* nextDot = strchr(currDomain, '.');
    340  const char* end = currDomain + aHostname.Length();
    341  // Default value of *eTLD is currDomain as set in the while loop below
    342  const char* eTLD = nullptr;
    343  bool hasKnownPublicSuffix = false;
    344  while (true) {
    345    // sanity check the string we're about to look up: it should not begin
    346    // with a '.'; this would mean the hostname began with a '.' or had an
    347    // embedded '..' sequence.
    348    if (*currDomain == '.') {
    349      // Update the MRU table if in use.
    350      if (entry) {
    351        entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG});
    352      }
    353 
    354      return NS_ERROR_INVALID_ARG;
    355    }
    356 
    357    // Perform the lookup.
    358    const int result = mGraph.Lookup(Substring(currDomain, end));
    359 
    360    if (result != Dafsa::kKeyNotFound) {
    361      hasKnownPublicSuffix = true;
    362      if (result == kWildcardRule && prevDomain) {
    363        // wildcard rules imply an eTLD one level inferior to the match.
    364        eTLD = prevDomain;
    365        break;
    366      }
    367      if (result != kExceptionRule || !nextDot) {
    368        // specific match, or we've hit the top domain level
    369        eTLD = currDomain;
    370        break;
    371      }
    372      if (result == kExceptionRule) {
    373        // exception rules imply an eTLD one level superior to the match.
    374        eTLD = nextDot + 1;
    375        break;
    376      }
    377    }
    378 
    379    if (!nextDot) {
    380      // we've hit the top domain level; use it by default.
    381      eTLD = currDomain;
    382      break;
    383    }
    384 
    385    prevDomain = currDomain;
    386    currDomain = nextDot + 1;
    387    nextDot = strchr(currDomain, '.');
    388  }
    389 
    390  if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) {
    391    aBaseDomain.Truncate();
    392    return NS_OK;
    393  }
    394 
    395  const char *begin, *iter;
    396  if (aAdditionalParts < 0) {
    397    NS_ASSERTION(aAdditionalParts == -1,
    398                 "aAdditionalParts can't be negative and different from -1");
    399 
    400    for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) {
    401      ;
    402    }
    403 
    404    if (iter != eTLD) {
    405      iter++;
    406    }
    407    if (iter != eTLD) {
    408      aAdditionalParts = 0;
    409    }
    410  } else {
    411    // count off the number of requested domains.
    412    begin = aHostname.get();
    413    iter = eTLD;
    414 
    415    while (true) {
    416      if (iter == begin) {
    417        break;
    418      }
    419 
    420      if (*(--iter) == '.' && aAdditionalParts-- == 0) {
    421        ++iter;
    422        ++aAdditionalParts;
    423        break;
    424      }
    425    }
    426  }
    427 
    428  if (aAdditionalParts != 0) {
    429    // Update the MRU table if in use.
    430    if (entry) {
    431      entry->Set(
    432          TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS});
    433    }
    434 
    435    return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
    436  }
    437 
    438  aBaseDomain = Substring(iter, end);
    439 
    440  // Update the MRU table if in use.
    441  if (entry) {
    442    entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK});
    443  }
    444 
    445  // add on the trailing dot, if applicable
    446  if (trailingDot) {
    447    aBaseDomain.Append('.');
    448  }
    449 
    450  return NS_OK;
    451 }
    452 
    453 NS_IMETHODIMP
    454 nsEffectiveTLDService::HasRootDomain(const nsACString& aInput,
    455                                     const nsACString& aHost, bool* aResult) {
    456  return net::HasRootDomain(aInput, aHost, aResult);
    457 }
    458 
    459 NS_IMETHODIMP
    460 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI* aURI, bool* aResult) {
    461  NS_ENSURE_ARG_POINTER(aURI);
    462 
    463  nsAutoCString host;
    464  nsresult rv = NS_GetInnermostURIHost(aURI, host);
    465  if (NS_FAILED(rv)) {
    466    return rv;
    467  }
    468 
    469  return HasKnownPublicSuffixFromHost(host, aResult);
    470 }
    471 
    472 NS_IMETHODIMP
    473 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString& aHostname,
    474                                                    bool* aResult) {
    475  // Create a mutable copy of the hostname and normalize it to ACE.
    476  // This will fail if the hostname includes invalid characters.
    477  nsAutoCString hostname;
    478  nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, hostname);
    479  if (NS_FAILED(rv)) {
    480    return rv;
    481  }
    482 
    483  if (hostname.IsEmpty() || hostname == ".") {
    484    return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
    485  }
    486 
    487  // Remove any trailing dot ("example.com." should have a valid suffix)
    488  if (hostname.Last() == '.') {
    489    hostname.Truncate(hostname.Length() - 1);
    490  }
    491 
    492  // Check if we can find a suffix on the PSL. Start with the top level domain
    493  // (for example "com" in "example.com"). If that isn't on the PSL, continue to
    494  // add domain segments from the end (for example for "example.co.za", "za" is
    495  // not on the PSL, but "co.za" is).
    496  int32_t dotBeforeSuffix = -1;
    497  int8_t i = 0;
    498  do {
    499    dotBeforeSuffix = Substring(hostname, 0, dotBeforeSuffix).RFindChar('.');
    500 
    501    const nsACString& suffix = Substring(
    502        hostname, dotBeforeSuffix == kNotFound ? 0 : dotBeforeSuffix + 1);
    503 
    504    if (mGraph.Lookup(suffix) != Dafsa::kKeyNotFound) {
    505      *aResult = true;
    506      return NS_OK;
    507    }
    508 
    509    // To save time, only check up to 9 segments. We can be certain at that
    510    // point that the PSL doesn't contain a suffix with that many segments if we
    511    // didn't find a suffix earlier.
    512    i++;
    513  } while (dotBeforeSuffix != kNotFound && i < 10);
    514 
    515  *aResult = false;
    516  return NS_OK;
    517 }