tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

InteractionsBlocklist.sys.mjs (8422B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 const lazy = {};
      6 
      7 ChromeUtils.defineESModuleGetters(lazy, {
      8  FilterAdult: "resource:///modules/FilterAdult.sys.mjs",
      9  UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs",
     10 });
     11 
     12 ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
     13  return console.createInstance({
     14    prefix: "InteractionsBlocklist",
     15    maxLogLevel: Services.prefs.getBoolPref(
     16      "browser.places.interactions.log",
     17      false
     18    )
     19      ? "Debug"
     20      : "Warn",
     21  });
     22 });
     23 
     24 // A blocklist of regular expressions. Maps base hostnames to a list regular
     25 // expressions for URLs with that base hostname. In this context, "base
     26 // hostname" means the hostname without any subdomains or a public suffix. For
     27 // example, the base hostname for "https://www.maps.google.com/a/place" is
     28 // "google". We do this mapping to improve performance; otherwise we'd have to
     29 // check all URLs against a long list of regular expressions. The regexes are
     30 // defined as escaped strings so that we build them lazily.
     31 // We may want to migrate this list to Remote Settings in the future.
     32 let HOST_BLOCKLIST = {
     33  auth0: [
     34    // Auth0 OAuth.
     35    // XXX: Used alone this could produce false positives where an auth0 URL
     36    // appears after another valid domain and TLD, but since we limit this to
     37    // the auth0 hostname those occurrences will be filtered out.
     38    "^https:\\/\\/.*\\.auth0\\.com\\/login",
     39  ],
     40  baidu: [
     41    // Baidu SERP
     42    "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
     43  ],
     44  bing: [
     45    // Bing SERP
     46    "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
     47  ],
     48  duckduckgo: [
     49    // DuckDuckGo SERP
     50    "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
     51  ],
     52  google: [
     53    // Google SERP
     54    "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
     55    // Google OAuth
     56    "^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth",
     57    "^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent",
     58  ],
     59  microsoftonline: [
     60    // Microsoft OAuth
     61    "^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize",
     62  ],
     63  yandex: [
     64    // Yandex SERP
     65    "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
     66  ],
     67  zoom: [
     68    // Zoom meeting interstitial
     69    "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
     70  ],
     71 };
     72 
     73 HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
     74  get(target, property) {
     75    let regexes = target[property];
     76    if (!regexes || !Array.isArray(regexes)) {
     77      return null;
     78    }
     79 
     80    for (let i = 0; i < regexes.length; i++) {
     81      let regex = regexes[i];
     82      if (typeof regex === "string") {
     83        regex = new RegExp(regex, "i");
     84        if (regex) {
     85          regexes[i] = regex;
     86        } else {
     87          throw new Error("Blocklist contains invalid regex.");
     88        }
     89      }
     90    }
     91    return regexes;
     92  },
     93 });
     94 
     95 /**
     96 * A class that maintains a blocklist of URLs. The class exposes a method to
     97 * check if a particular URL is contained on the blocklist.
     98 */
     99 class _InteractionsBlocklist {
    100  constructor() {
    101    // Load custom blocklist items from pref.
    102    try {
    103      let customBlocklist = JSON.parse(
    104        Services.prefs.getStringPref(
    105          "places.interactions.customBlocklist",
    106          "[]"
    107        )
    108      );
    109      if (!Array.isArray(customBlocklist)) {
    110        throw new Error();
    111      }
    112      let parsedBlocklist = customBlocklist.map(
    113        regexStr => new RegExp(regexStr)
    114      );
    115      HOST_BLOCKLIST["*"] = parsedBlocklist;
    116    } catch (ex) {
    117      lazy.logConsole.warn("places.interactions.customBlocklist is corrupted.");
    118    }
    119  }
    120 
    121  /**
    122   * Only certain urls can be added as Interactions, either manually or
    123   * automatically.
    124   *
    125   * @returns {Map} A Map keyed by protocol, for each protocol an object may
    126   *          define stricter requirements, like extension.
    127   */
    128  get urlRequirements() {
    129    return new Map([
    130      ["http:", {}],
    131      ["https:", {}],
    132      ["file:", { extension: "pdf" }],
    133    ]);
    134  }
    135 
    136  /**
    137   * Whether to record interactions for a given URL.
    138   * The rules are defined in InteractionsBlocklist.urlRequirements.
    139   *
    140   * @param {string|URL|nsIURI} url The URL to check.
    141   * @returns {boolean} whether the url can be recorded.
    142   */
    143  canRecordUrl(url) {
    144    let protocol, pathname;
    145    if (typeof url == "string") {
    146      url = new URL(url);
    147    }
    148    if (url instanceof Ci.nsIURI) {
    149      protocol = url.scheme + ":";
    150      pathname = url.filePath;
    151    } else {
    152      protocol = url.protocol;
    153      pathname = url.pathname;
    154    }
    155    let requirements = InteractionsBlocklist.urlRequirements.get(protocol);
    156    return (
    157      requirements &&
    158      (!requirements.extension || pathname.endsWith(requirements.extension))
    159    );
    160  }
    161 
    162  /**
    163   * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
    164   * should not record an interaction.
    165   *
    166   * @param {string} urlToCheck
    167   *   The URL we are looking for on the blocklist.
    168   * @returns {boolean}
    169   *  True if `url` is on a blocklist. False otherwise.
    170   */
    171  isUrlBlocklisted(urlToCheck) {
    172    if (lazy.FilterAdult.isAdultUrl(urlToCheck)) {
    173      return true;
    174    }
    175 
    176    if (!this.canRecordUrl(urlToCheck)) {
    177      return true;
    178    }
    179 
    180    // First, find the URL's base host: the hostname without any subdomains or a
    181    // public suffix.
    182    let url = URL.parse(urlToCheck);
    183    if (!url) {
    184      lazy.logConsole.warn(
    185        `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${urlToCheck}`
    186      );
    187      return false;
    188    }
    189 
    190    if (url.protocol == "file:") {
    191      return false;
    192    }
    193 
    194    let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost(
    195      url.host
    196    );
    197    let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim(
    198      hostWithoutSuffix,
    199      {
    200        stripWww: true,
    201        trimTrailingDot: true,
    202      }
    203    );
    204    let baseHost = hostWithSubdomains.substring(
    205      hostWithSubdomains.lastIndexOf(".") + 1
    206    );
    207    // Then fetch blocked regexes for that baseHost and compare them to the full
    208    // URL. Also check the URL against the custom blocklist.
    209    let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || [];
    210    regexes.push(...(HOST_BLOCKLIST["*"] || []));
    211    if (!regexes) {
    212      return false;
    213    }
    214 
    215    return regexes.some(r => r.test(url.href));
    216  }
    217 
    218  /**
    219   * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from
    220   * the regex, we add it to a list of wildcard regexes. All URLs are checked
    221   * against these wildcard regexes. Currently only exposed for tests and use in
    222   * the console. In the future we could hook this up to a UI component.
    223   *
    224   * @param {string|RegExp} regexToAdd
    225   *   The regular expression to add to our blocklist.
    226   */
    227  addRegexToBlocklist(regexToAdd) {
    228    let regex;
    229    try {
    230      regex = new RegExp(regexToAdd, "i");
    231    } catch (ex) {
    232      lazy.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
    233      return;
    234    }
    235 
    236    if (!HOST_BLOCKLIST["*"]) {
    237      HOST_BLOCKLIST["*"] = [];
    238    }
    239    HOST_BLOCKLIST["*"].push(regex);
    240    Services.prefs.setStringPref(
    241      "places.interactions.customBlocklist",
    242      JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
    243    );
    244  }
    245 
    246  /**
    247   * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the
    248   * blocklist, this is a no-op. Currently only exposed for tests and use in the
    249   * console. In the future we could hook this up to a UI component.
    250   *
    251   * @param {string|RegExp} regexToRemove
    252   *   The regular expression to add to our blocklist.
    253   */
    254  removeRegexFromBlocklist(regexToRemove) {
    255    let regex;
    256    try {
    257      regex = new RegExp(regexToRemove, "i");
    258    } catch (ex) {
    259      lazy.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
    260      return;
    261    }
    262 
    263    if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) {
    264      return;
    265    }
    266    HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter(
    267      curr => curr.source != regex.source
    268    );
    269    Services.prefs.setStringPref(
    270      "places.interactions.customBlocklist",
    271      JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
    272    );
    273  }
    274 }
    275 
    276 export const InteractionsBlocklist = new _InteractionsBlocklist();