tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SiteClassifier.sys.mjs (3297B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 // We use importESModule here instead of static import so that
      6 // the Karma test environment won't choke on this module. This
      7 // is because the Karma test environment already stubs out
      8 // RemoteSettings, and overrides importESModule to be a no-op (which
      9 // can't be done for a static import statement).
     10 
     11 // eslint-disable-next-line mozilla/use-static-import
     12 const { RemoteSettings } = ChromeUtils.importESModule(
     13  "resource://services-settings/remote-settings.sys.mjs"
     14 );
     15 
     16 // Returns whether the passed in params match the criteria.
     17 // To match, they must contain all the params specified in criteria and the values
     18 // must match if a value is provided in criteria.
     19 function _hasParams(criteria, params) {
     20  for (let param of criteria) {
     21    const val = params.get(param.key);
     22    if (
     23      val === null ||
     24      (param.value && param.value !== val) ||
     25      (param.prefix && !val.startsWith(param.prefix))
     26    ) {
     27      return false;
     28    }
     29  }
     30  return true;
     31 }
     32 
     33 /**
     34 * classifySite
     35 * Classifies a given URL into a category based on classification data from RemoteSettings.
     36 * The data from remote settings can match a category by one of the following:
     37 *  - match the exact URL
     38 *  - match the hostname or second level domain (sld)
     39 *  - match query parameter(s), and optionally their values or prefixes
     40 *  - match both (hostname or sld) and query parameter(s)
     41 *
     42 * The data looks like:
     43 * [{
     44 *    "type": "hostname-and-params-match",
     45 *    "criteria": [
     46 *      {
     47 *        "url": "https://matchurl.com",
     48 *        "hostname": "matchhostname.com",
     49 *        "sld": "secondleveldomain",
     50 *        "params": [
     51 *          {
     52 *            "key": "matchparam",
     53 *            "value": "matchvalue",
     54 *            "prefix": "matchpPrefix",
     55 *          },
     56 *        ],
     57 *      },
     58 *    ],
     59 *    "weight": 300,
     60 *  },...]
     61 */
     62 export async function classifySite(url, RS = RemoteSettings) {
     63  let category = "other";
     64  let parsedURL;
     65 
     66  // Try to parse the url.
     67  for (let _url of [url, `https://${url}`]) {
     68    try {
     69      parsedURL = new URL(_url);
     70      break;
     71    } catch (e) {}
     72  }
     73 
     74  if (parsedURL) {
     75    // If we parsed successfully, find a match.
     76    const hostname = parsedURL.hostname.replace(/^www\./i, "");
     77    const params = parsedURL.searchParams;
     78    // NOTE: there will be an initial/default local copy of the data in m-c.
     79    // Therefore, this should never return an empty list [].
     80    const siteTypes = await RS("sites-classification").get();
     81    const sortedSiteTypes = siteTypes.sort(
     82      (x, y) => (y.weight || 0) - (x.weight || 0)
     83    );
     84    for (let type of sortedSiteTypes) {
     85      for (let criteria of type.criteria) {
     86        if (criteria.url && criteria.url !== url) {
     87          continue;
     88        }
     89        if (criteria.hostname && criteria.hostname !== hostname) {
     90          continue;
     91        }
     92        if (criteria.sld && criteria.sld !== hostname.split(".")[0]) {
     93          continue;
     94        }
     95        if (criteria.params && !_hasParams(criteria.params, params)) {
     96          continue;
     97        }
     98        return type.type;
     99      }
    100    }
    101  }
    102  return category;
    103 }