tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

UrlClassifierExceptionList.cpp (8378B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "UrlClassifierExceptionList.h"
      8 #include "nsIEffectiveTLDService.h"
      9 #include "nsIUrlClassifierExceptionListEntry.h"
     10 #include "nsIURI.h"
     11 #include "mozilla/net/UrlClassifierCommon.h"
     12 #include "mozilla/ProfilerMarkers.h"
     13 #include "nsNetCID.h"
     14 #include "nsServiceManagerUtils.h"
     15 #include "mozilla/RustRegex.h"
     16 
     17 namespace mozilla::net {
     18 
     19 NS_IMPL_ISUPPORTS(UrlClassifierExceptionList, nsIUrlClassifierExceptionList)
     20 
     21 NS_IMETHODIMP
     22 UrlClassifierExceptionList::Init(const nsACString& aFeature) {
     23  mFeature = aFeature;
     24  return NS_OK;
     25 }
     26 
     27 NS_IMETHODIMP
     28 UrlClassifierExceptionList::AddEntry(
     29    nsIUrlClassifierExceptionListEntry* aEntry) {
     30  NS_ENSURE_ARG_POINTER(aEntry);
     31 
     32  // From the url patterns in the entry, extract the site and top level site.
     33  // They are used as keys in the exception entry maps.
     34 
     35  nsAutoCString urlPattern;
     36  nsresult rv = aEntry->GetUrlPattern(urlPattern);
     37  NS_ENSURE_SUCCESS(rv, rv);
     38 
     39  nsAutoCString site;
     40  rv = GetSchemelessSiteFromUrlPattern(urlPattern, site);
     41  NS_ENSURE_SUCCESS(rv, rv);
     42 
     43  // We must be able to parse a site from the url pattern.
     44  NS_ENSURE_TRUE(!site.IsEmpty(), NS_ERROR_INVALID_ARG);
     45 
     46  nsAutoCString topLevelUrlPattern;
     47  rv = aEntry->GetTopLevelUrlPattern(topLevelUrlPattern);
     48  NS_ENSURE_SUCCESS(rv, rv);
     49 
     50  nsAutoCString topLevelSite;
     51  rv = GetSchemelessSiteFromUrlPattern(topLevelUrlPattern, topLevelSite);
     52  NS_ENSURE_SUCCESS(rv, rv);
     53 
     54  // topLevelUrlPattern is not mandatory, but if topLevelUrlPattern is set,
     55  // topLevelSite populated as well.
     56  NS_ENSURE_TRUE(topLevelUrlPattern.IsEmpty() == topLevelSite.IsEmpty(),
     57                 NS_ERROR_INVALID_ARG);
     58 
     59  if (MOZ_LOG_TEST(UrlClassifierCommon::sLog, LogLevel::Debug)) {
     60    nsAutoCString entryString;
     61    (void)aEntry->Describe(entryString);
     62    UC_LOG_DEBUG(("UrlClassifierExceptionList::%s - Adding entry: %s",
     63                  __FUNCTION__, entryString.get()));
     64  }
     65 
     66  // If the top level site is empty, the exception applies across all top
     67  // level sites. Store it in the global exceptions map.
     68  if (topLevelSite.IsEmpty()) {
     69    mGlobalExceptions.LookupOrInsert(site).AppendElement(aEntry);
     70    return NS_OK;
     71  }
     72 
     73  // Otherwise, store it in the site specific exception map.
     74  mExceptions
     75      // Outer map keyed by top level site.
     76      // topLevelSite may be the empty string. We still use that a key. These
     77      // entries apply to all top-level sites.
     78      .LookupOrInsert(topLevelSite)
     79      // Inner map keyed by site of the load.
     80      .LookupOrInsert(site)
     81      // Append the entry.
     82      .AppendElement(aEntry);
     83 
     84  return NS_OK;
     85 }
     86 
     87 NS_IMETHODIMP
     88 UrlClassifierExceptionList::Matches(nsIURI* aURI, nsIURI* aTopLevelURI,
     89                                    bool aIsPrivateBrowsing, bool* aResult) {
     90  NS_ENSURE_ARG_POINTER(aURI);
     91  NS_ENSURE_ARG_POINTER(aResult);
     92 
     93  // Record how long it takes to perform the exception list lookup.
     94  AUTO_PROFILER_MARKER_UNTYPED("UrlClassifierExceptionList::Matches", OTHER,
     95                               MarkerTiming::IntervalStart());
     96 
     97  *aResult = false;
     98 
     99  UC_LOG_DEBUG(
    100      ("UrlClassifierExceptionList::%s - aURI: %s, aTopLevelURI: %s, "
    101       "aIsPrivateBrowsing: %d",
    102       __FUNCTION__, aURI->GetSpecOrDefault().get(),
    103       aTopLevelURI ? aTopLevelURI->GetSpecOrDefault().get() : "null",
    104       aIsPrivateBrowsing));
    105 
    106  // Get the eTLD service so we can compute sites from URIs.
    107  nsresult rv;
    108  nsCOMPtr<nsIEffectiveTLDService> eTLDService(
    109      do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID, &rv));
    110  NS_ENSURE_SUCCESS(rv, rv);
    111 
    112  // If given, compute the (schemeless) site from the top level URI.
    113  // If not we will leave it empty and only look for global exceptions.
    114  nsAutoCString aTopLevelSite;
    115  if (aTopLevelURI) {
    116    rv = eTLDService->GetSchemelessSite(aTopLevelURI, aTopLevelSite);
    117    NS_ENSURE_SUCCESS(rv, rv);
    118  }
    119 
    120  // Compute the (schemeless) site from the URI of the load.
    121  nsAutoCString aSite;
    122  rv = eTLDService->GetSchemelessSite(aURI, aSite);
    123  NS_ENSURE_SUCCESS(rv, rv);
    124 
    125  // Get the list of exceptions that apply to the current load.
    126  // We need to check both global and site specific exceptions
    127 
    128  // 1. Check global exceptions, which apply to all top level sites and lookup
    129  //    entries matching the current load (aSite).
    130  ExceptionEntryArray* globalExceptions =
    131      mGlobalExceptions.Lookup(aSite).DataPtrOrNull();
    132 
    133  *aResult = ExceptionListMatchesLoad(globalExceptions, aURI, aTopLevelURI,
    134                                      aIsPrivateBrowsing);
    135  if (*aResult) {
    136    // We found a match, no need to check the site specific exceptions.
    137    return NS_OK;
    138  }
    139 
    140  // 2. Get exceptions which apply only to the current top level site.
    141  SiteToEntries* topLevelSiteToEntries =
    142      mExceptions.Lookup(aTopLevelSite).DataPtrOrNull();
    143  if (topLevelSiteToEntries) {
    144    ExceptionEntryArray* siteSpecificExceptions =
    145        topLevelSiteToEntries->Lookup(aSite).DataPtrOrNull();
    146 
    147    *aResult = ExceptionListMatchesLoad(siteSpecificExceptions, aURI,
    148                                        aTopLevelURI, aIsPrivateBrowsing);
    149    if (*aResult) {
    150      return NS_OK;
    151    }
    152  }
    153 
    154  if (!(*aResult)) {
    155    UC_LOG_DEBUG(("%s - No match found", __FUNCTION__));
    156  }
    157 
    158  return NS_OK;
    159 }
    160 
    161 bool UrlClassifierExceptionList::ExceptionListMatchesLoad(
    162    ExceptionEntryArray* aExceptions, nsIURI* aURI, nsIURI* aTopLevelURI,
    163    bool aIsPrivateBrowsing) {
    164  MOZ_ASSERT(aURI);
    165 
    166  if (!aExceptions) {
    167    return false;
    168  }
    169  for (const auto& entry : *aExceptions) {
    170    bool match = false;
    171    nsresult rv =
    172        entry->Matches(aURI, aTopLevelURI, aIsPrivateBrowsing, &match);
    173    if (NS_WARN_IF(NS_FAILED(rv))) {
    174      continue;
    175    }
    176    if (match) {
    177      // Match found, return immediately.
    178      if (MOZ_LOG_TEST(UrlClassifierCommon::sLog, LogLevel::Debug)) {
    179        nsAutoCString entryString;
    180        (void)entry->Describe(entryString);
    181        UC_LOG_DEBUG(
    182            ("UrlClassifierExceptionList::%s - Exception list match found. "
    183             "entry: %s",
    184             __FUNCTION__, entryString.get()));
    185      }
    186      return true;
    187    }
    188  }
    189  return false;
    190 }
    191 
    192 NS_IMETHODIMP
    193 UrlClassifierExceptionList::GetSchemelessSiteFromUrlPattern(
    194    const nsACString& aUrlPattern, nsACString& aSite) {
    195  if (aUrlPattern.IsEmpty()) {
    196    aSite.Truncate();
    197    return NS_OK;
    198  }
    199 
    200  // Extract the host portion from the url pattern. This regex only supports url
    201  // patterns with a host.
    202  mozilla::RustRegex regex("://(?:\\*\\.)?([^/*]+)");
    203  mozilla::RustRegexCaptures captures = regex.FindCaptures(aUrlPattern);
    204  NS_ENSURE_TRUE(captures.IsValid(), NS_ERROR_INVALID_ARG);
    205 
    206  // Get the host from the first capture group
    207  auto maybeMatch = captures[1];
    208  NS_ENSURE_TRUE(maybeMatch, NS_ERROR_INVALID_ARG);
    209 
    210  nsAutoCString host;
    211  host.Assign(Substring(aUrlPattern, maybeMatch->start,
    212                        maybeMatch->end - maybeMatch->start));
    213  NS_ENSURE_TRUE(!host.IsEmpty(), NS_ERROR_INVALID_ARG);
    214 
    215  // Get the eTLD service to convert host to schemeless site
    216  nsresult rv;
    217  nsCOMPtr<nsIEffectiveTLDService> eTLDService(
    218      do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID, &rv));
    219  NS_ENSURE_SUCCESS(rv, rv);
    220 
    221  return eTLDService->GetSchemelessSiteFromHost(host, aSite);
    222 }
    223 
    224 NS_IMETHODIMP
    225 UrlClassifierExceptionList::TestGetEntries(
    226    nsTArray<RefPtr<nsIUrlClassifierExceptionListEntry>>& aEntries) {
    227  // Global entries (not top-level specific)
    228  for (const auto& entry : mGlobalExceptions) {
    229    const ExceptionEntryArray& entries = entry.GetData();
    230    aEntries.AppendElements(entries);
    231  }
    232 
    233  // Site specific entries.
    234  // Iterate through the outer map (top-level sites)
    235  for (const auto& outerEntry : mExceptions) {
    236    const SiteToEntries& innerMap = outerEntry.GetData();
    237 
    238    // Iterate through the inner map (sites to exception entries)
    239    for (const auto& innerEntry : innerMap) {
    240      const ExceptionEntryArray& entries = innerEntry.GetData();
    241      // Append all entries from this array to the result
    242      aEntries.AppendElements(entries);
    243    }
    244  }
    245 
    246  return NS_OK;
    247 }
    248 }  // namespace mozilla::net