nsEffectiveTLDService.cpp (16140B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // This service reads a file of rules describing TLD-like domain names. For a 8 // complete description of the expected file format and parsing rules, see 9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service 10 11 #include "mozilla/Components.h" 12 #include "mozilla/ClearOnShutdown.h" 13 #include "mozilla/MemoryReporting.h" 14 15 #include "MainThreadUtils.h" 16 #include "nsContentUtils.h" 17 #include "nsCRT.h" 18 #include "nsEffectiveTLDService.h" 19 #include "nsIFile.h" 20 #include "nsIURI.h" 21 #include "nsNetCID.h" 22 #include "nsNetUtil.h" 23 #include "nsServiceManagerUtils.h" 24 #include "mozilla/net/DNS.h" 25 26 namespace etld_dafsa { 27 28 // Generated file that includes kDafsa 29 #include "etld_data.inc" 30 31 } // namespace etld_dafsa 32 33 using namespace mozilla; 34 35 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, 36 nsIMemoryReporter) 37 38 // ---------------------------------------------------------------------- 39 40 static StaticRefPtr<nsEffectiveTLDService> gService; 41 42 nsEffectiveTLDService::nsEffectiveTLDService() : mGraph(etld_dafsa::kDafsa) {} 43 44 nsresult nsEffectiveTLDService::Init() { 45 MOZ_ASSERT(NS_IsMainThread()); 46 47 if (gService) { 48 return NS_ERROR_ALREADY_INITIALIZED; 49 } 50 51 RegisterWeakMemoryReporter(this); 52 53 return NS_OK; 54 } 55 56 nsEffectiveTLDService::~nsEffectiveTLDService() { 57 UnregisterWeakMemoryReporter(this); 58 } 59 60 // static 61 already_AddRefed<nsIEffectiveTLDService> 62 nsEffectiveTLDService::GetXPCOMSingleton() { 63 if (gService) { 64 return do_AddRef(gService); 65 } 66 RefPtr<nsEffectiveTLDService> instance = new nsEffectiveTLDService(); 67 nsresult rv = instance->Init(); 68 if (NS_FAILED(rv)) { 69 return nullptr; 70 } 71 gService = instance; 72 ClearOnShutdown(&gService); 73 return instance.forget(); 74 } 75 76 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) 77 78 // The amount of heap memory measured here is tiny. It used to be bigger when 79 // nsEffectiveTLDService used a separate hash table instead of binary search. 80 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will 81 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime. 82 NS_IMETHODIMP 83 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, 84 nsISupports* aData, bool aAnonymize) { 85 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP, 86 UNITS_BYTES, 87 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), 88 "Memory used by the effective TLD service."); 89 90 return NS_OK; 91 } 92 93 size_t nsEffectiveTLDService::SizeOfIncludingThis( 94 mozilla::MallocSizeOf aMallocSizeOf) { 95 size_t n = aMallocSizeOf(this); 96 97 return n; 98 } 99 100 // External function for dealing with URI's correctly. 101 // Pulls out the host portion from an nsIURI, and calls through to 102 // GetPublicSuffixFromHost(). 103 NS_IMETHODIMP 104 nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI, 105 nsACString& aPublicSuffix) { 106 NS_ENSURE_ARG_POINTER(aURI); 107 108 nsAutoCString host; 109 nsresult rv = NS_GetInnermostURIHost(aURI, host); 110 if (NS_FAILED(rv)) { 111 return rv; 112 } 113 114 return GetBaseDomainInternal(host, 0, false, aPublicSuffix); 115 } 116 117 NS_IMETHODIMP 118 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI, 119 nsACString& aPublicSuffix) { 120 NS_ENSURE_ARG_POINTER(aURI); 121 122 nsAutoCString host; 123 nsresult rv = NS_GetInnermostURIHost(aURI, host); 124 if (NS_FAILED(rv)) { 125 return rv; 126 } 127 128 return GetBaseDomainInternal(host, 0, true, aPublicSuffix); 129 } 130 131 // External function for dealing with URI's correctly. 132 // Pulls out the host portion from an nsIURI, and calls through to 133 // GetBaseDomainFromHost(). 134 NS_IMETHODIMP 135 nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts, 136 nsACString& aBaseDomain) { 137 NS_ENSURE_ARG_POINTER(aURI); 138 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); 139 140 nsAutoCString host; 141 nsresult rv = NS_GetInnermostURIHost(aURI, host); 142 if (NS_FAILED(rv)) { 143 return rv; 144 } 145 146 return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain); 147 } 148 149 // External function for dealing with URIs to get a schemeless site. 150 // Calls through to GetBaseDomain(), handling IP addresses and aliases by 151 // just returning their serialized host. 152 NS_IMETHODIMP 153 nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) { 154 NS_ENSURE_ARG_POINTER(aURI); 155 156 nsresult rv = GetBaseDomain(aURI, 0, aSite); 157 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS || 158 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) { 159 rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite); 160 } 161 return rv; 162 } 163 164 // Variant of GetSchemelessSite which accepts a host string instead of a URI. 165 NS_IMETHODIMP 166 nsEffectiveTLDService::GetSchemelessSiteFromHost(const nsACString& aHostname, 167 nsACString& aSite) { 168 NS_ENSURE_TRUE(!aHostname.IsEmpty(), NS_ERROR_FAILURE); 169 170 nsresult rv = GetBaseDomainFromHost(aHostname, 0, aSite); 171 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS || 172 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) { 173 aSite.Assign(aHostname); 174 nsContentUtils::MaybeFixIPv6Host(aSite); 175 176 return NS_OK; 177 } 178 return rv; 179 } 180 181 // External function for dealing with URIs to get site correctly. 182 // Calls through to GetSchemelessSite(), and serializes with the scheme and 183 // "://" prepended. 184 NS_IMETHODIMP 185 nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) { 186 NS_ENSURE_ARG_POINTER(aURI); 187 188 nsAutoCString scheme; 189 nsresult rv = aURI->GetScheme(scheme); 190 NS_ENSURE_SUCCESS(rv, rv); 191 192 nsAutoCString schemeless; 193 rv = GetSchemelessSite(aURI, schemeless); 194 NS_ENSURE_SUCCESS(rv, rv); 195 196 // aURI (and thus BaseDomain) may be the string '.'. If so, fail. 197 if (schemeless.Length() == 1 && schemeless.Last() == '.') { 198 return NS_ERROR_INVALID_ARG; 199 } 200 201 // Reject any URIs without a host that aren't file:// URIs. 202 if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) { 203 return NS_ERROR_INVALID_ARG; 204 } 205 206 aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length()); 207 aSite.Append(scheme); 208 aSite.Append("://"_ns); 209 aSite.Append(schemeless); 210 211 return NS_OK; 212 } 213 214 // External function for dealing with a host string directly: finds the public 215 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). 216 NS_IMETHODIMP 217 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname, 218 nsACString& aPublicSuffix) { 219 // This will fail if the hostname includes invalid characters. 220 nsAutoCString normHostname; 221 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname); 222 if (NS_FAILED(rv)) { 223 return rv; 224 } 225 226 return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix); 227 } 228 229 NS_IMETHODIMP 230 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname, 231 nsACString& aPublicSuffix) { 232 // This will fail if the hostname includes invalid characters. 233 nsAutoCString normHostname; 234 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname); 235 if (NS_FAILED(rv)) { 236 return rv; 237 } 238 239 return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix); 240 } 241 242 // External function for dealing with a host string directly: finds the base 243 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts 244 // requested. See GetBaseDomainInternal(). 245 NS_IMETHODIMP 246 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname, 247 uint32_t aAdditionalParts, 248 nsACString& aBaseDomain) { 249 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); 250 251 // This will fail if the hostname includes invalid characters. 252 nsAutoCString normHostname; 253 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname); 254 if (NS_FAILED(rv)) { 255 return rv; 256 } 257 258 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false, 259 aBaseDomain); 260 } 261 262 NS_IMETHODIMP 263 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, 264 nsACString& aBaseDomain) { 265 // This will fail if the hostname includes invalid characters. 266 nsAutoCString normHostname; 267 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, normHostname); 268 if (NS_FAILED(rv)) { 269 return rv; 270 } 271 272 return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain); 273 } 274 275 // Finds the base domain for a host, with requested number of additional parts. 276 // This will fail, generating an error, if the host is an IPv4/IPv6 address, 277 // if more subdomain parts are requested than are available, or if the hostname 278 // includes characters that are not valid in a URL. Normalization is performed 279 // on the host string and the result will be in UTF8. 280 nsresult nsEffectiveTLDService::GetBaseDomainInternal( 281 nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix, 282 nsACString& aBaseDomain) { 283 const int kExceptionRule = 1; 284 const int kWildcardRule = 2; 285 286 if (aHostname.IsEmpty()) { 287 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; 288 } 289 290 // chomp any trailing dot, and keep track of it for later 291 bool trailingDot = aHostname.Last() == '.'; 292 if (trailingDot) { 293 aHostname.Truncate(aHostname.Length() - 1); 294 } 295 296 // check the edge cases of the host being '.' or having a second trailing '.', 297 // since subsequent checks won't catch it. 298 if (aHostname.IsEmpty() || aHostname.Last() == '.') { 299 return NS_ERROR_INVALID_ARG; 300 } 301 302 // Lookup in the cache if this is a normal query. This is restricted to 303 // main thread-only as the cache is not thread-safe. 304 Maybe<TldCache::Entry> entry; 305 if (aAdditionalParts == 1 && NS_IsMainThread()) { 306 auto p = mMruTable.Lookup(aHostname); 307 if (p) { 308 if (NS_FAILED(p.Data().mResult)) { 309 return p.Data().mResult; 310 } 311 312 // There was a match, just return the cached value. 313 aBaseDomain = p.Data().mBaseDomain; 314 if (trailingDot) { 315 aBaseDomain.Append('.'); 316 } 317 318 return NS_OK; 319 } 320 321 entry = Some(p); 322 } 323 324 // Check if we're dealing with an IPv4/IPv6 hostname, and return 325 if (mozilla::net::HostIsIPLiteral(aHostname)) { 326 // Update the MRU table if in use. 327 if (entry) { 328 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS}); 329 } 330 331 return NS_ERROR_HOST_IS_IP_ADDRESS; 332 } 333 334 // Walk up the domain tree, most specific to least specific, 335 // looking for matches at each level. Note that a given level may 336 // have multiple attributes (e.g. IsWild() and IsNormal()). 337 const char* prevDomain = nullptr; 338 const char* currDomain = aHostname.get(); 339 const char* nextDot = strchr(currDomain, '.'); 340 const char* end = currDomain + aHostname.Length(); 341 // Default value of *eTLD is currDomain as set in the while loop below 342 const char* eTLD = nullptr; 343 bool hasKnownPublicSuffix = false; 344 while (true) { 345 // sanity check the string we're about to look up: it should not begin 346 // with a '.'; this would mean the hostname began with a '.' or had an 347 // embedded '..' sequence. 348 if (*currDomain == '.') { 349 // Update the MRU table if in use. 350 if (entry) { 351 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG}); 352 } 353 354 return NS_ERROR_INVALID_ARG; 355 } 356 357 // Perform the lookup. 358 const int result = mGraph.Lookup(Substring(currDomain, end)); 359 360 if (result != Dafsa::kKeyNotFound) { 361 hasKnownPublicSuffix = true; 362 if (result == kWildcardRule && prevDomain) { 363 // wildcard rules imply an eTLD one level inferior to the match. 364 eTLD = prevDomain; 365 break; 366 } 367 if (result != kExceptionRule || !nextDot) { 368 // specific match, or we've hit the top domain level 369 eTLD = currDomain; 370 break; 371 } 372 if (result == kExceptionRule) { 373 // exception rules imply an eTLD one level superior to the match. 374 eTLD = nextDot + 1; 375 break; 376 } 377 } 378 379 if (!nextDot) { 380 // we've hit the top domain level; use it by default. 381 eTLD = currDomain; 382 break; 383 } 384 385 prevDomain = currDomain; 386 currDomain = nextDot + 1; 387 nextDot = strchr(currDomain, '.'); 388 } 389 390 if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) { 391 aBaseDomain.Truncate(); 392 return NS_OK; 393 } 394 395 const char *begin, *iter; 396 if (aAdditionalParts < 0) { 397 NS_ASSERTION(aAdditionalParts == -1, 398 "aAdditionalParts can't be negative and different from -1"); 399 400 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) { 401 ; 402 } 403 404 if (iter != eTLD) { 405 iter++; 406 } 407 if (iter != eTLD) { 408 aAdditionalParts = 0; 409 } 410 } else { 411 // count off the number of requested domains. 412 begin = aHostname.get(); 413 iter = eTLD; 414 415 while (true) { 416 if (iter == begin) { 417 break; 418 } 419 420 if (*(--iter) == '.' && aAdditionalParts-- == 0) { 421 ++iter; 422 ++aAdditionalParts; 423 break; 424 } 425 } 426 } 427 428 if (aAdditionalParts != 0) { 429 // Update the MRU table if in use. 430 if (entry) { 431 entry->Set( 432 TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS}); 433 } 434 435 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; 436 } 437 438 aBaseDomain = Substring(iter, end); 439 440 // Update the MRU table if in use. 441 if (entry) { 442 entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK}); 443 } 444 445 // add on the trailing dot, if applicable 446 if (trailingDot) { 447 aBaseDomain.Append('.'); 448 } 449 450 return NS_OK; 451 } 452 453 NS_IMETHODIMP 454 nsEffectiveTLDService::HasRootDomain(const nsACString& aInput, 455 const nsACString& aHost, bool* aResult) { 456 return net::HasRootDomain(aInput, aHost, aResult); 457 } 458 459 NS_IMETHODIMP 460 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI* aURI, bool* aResult) { 461 NS_ENSURE_ARG_POINTER(aURI); 462 463 nsAutoCString host; 464 nsresult rv = NS_GetInnermostURIHost(aURI, host); 465 if (NS_FAILED(rv)) { 466 return rv; 467 } 468 469 return HasKnownPublicSuffixFromHost(host, aResult); 470 } 471 472 NS_IMETHODIMP 473 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString& aHostname, 474 bool* aResult) { 475 // Create a mutable copy of the hostname and normalize it to ACE. 476 // This will fail if the hostname includes invalid characters. 477 nsAutoCString hostname; 478 nsresult rv = NS_DomainToASCIIAllowAnyGlyphfulASCII(aHostname, hostname); 479 if (NS_FAILED(rv)) { 480 return rv; 481 } 482 483 if (hostname.IsEmpty() || hostname == ".") { 484 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; 485 } 486 487 // Remove any trailing dot ("example.com." should have a valid suffix) 488 if (hostname.Last() == '.') { 489 hostname.Truncate(hostname.Length() - 1); 490 } 491 492 // Check if we can find a suffix on the PSL. Start with the top level domain 493 // (for example "com" in "example.com"). If that isn't on the PSL, continue to 494 // add domain segments from the end (for example for "example.co.za", "za" is 495 // not on the PSL, but "co.za" is). 496 int32_t dotBeforeSuffix = -1; 497 int8_t i = 0; 498 do { 499 dotBeforeSuffix = Substring(hostname, 0, dotBeforeSuffix).RFindChar('.'); 500 501 const nsACString& suffix = Substring( 502 hostname, dotBeforeSuffix == kNotFound ? 0 : dotBeforeSuffix + 1); 503 504 if (mGraph.Lookup(suffix) != Dafsa::kKeyNotFound) { 505 *aResult = true; 506 return NS_OK; 507 } 508 509 // To save time, only check up to 9 segments. We can be certain at that 510 // point that the PSL doesn't contain a suffix with that many segments if we 511 // didn't find a suffix earlier. 512 i++; 513 } while (dotBeforeSuffix != kNotFound && i < 10); 514 515 *aResult = false; 516 return NS_OK; 517 }