SiteClassifier.sys.mjs (3297B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 // We use importESModule here instead of static import so that 6 // the Karma test environment won't choke on this module. This 7 // is because the Karma test environment already stubs out 8 // RemoteSettings, and overrides importESModule to be a no-op (which 9 // can't be done for a static import statement). 10 11 // eslint-disable-next-line mozilla/use-static-import 12 const { RemoteSettings } = ChromeUtils.importESModule( 13 "resource://services-settings/remote-settings.sys.mjs" 14 ); 15 16 // Returns whether the passed in params match the criteria. 17 // To match, they must contain all the params specified in criteria and the values 18 // must match if a value is provided in criteria. 19 function _hasParams(criteria, params) { 20 for (let param of criteria) { 21 const val = params.get(param.key); 22 if ( 23 val === null || 24 (param.value && param.value !== val) || 25 (param.prefix && !val.startsWith(param.prefix)) 26 ) { 27 return false; 28 } 29 } 30 return true; 31 } 32 33 /** 34 * classifySite 35 * Classifies a given URL into a category based on classification data from RemoteSettings. 36 * The data from remote settings can match a category by one of the following: 37 * - match the exact URL 38 * - match the hostname or second level domain (sld) 39 * - match query parameter(s), and optionally their values or prefixes 40 * - match both (hostname or sld) and query parameter(s) 41 * 42 * The data looks like: 43 * [{ 44 * "type": "hostname-and-params-match", 45 * "criteria": [ 46 * { 47 * "url": "https://matchurl.com", 48 * "hostname": "matchhostname.com", 49 * "sld": "secondleveldomain", 50 * "params": [ 51 * { 52 * "key": "matchparam", 53 * "value": "matchvalue", 54 * "prefix": "matchpPrefix", 55 * }, 56 * ], 57 * }, 58 * ], 59 * "weight": 300, 60 * },...] 61 */ 62 export async function classifySite(url, RS = RemoteSettings) { 63 let category = "other"; 64 let parsedURL; 65 66 // Try to parse the url. 67 for (let _url of [url, `https://${url}`]) { 68 try { 69 parsedURL = new URL(_url); 70 break; 71 } catch (e) {} 72 } 73 74 if (parsedURL) { 75 // If we parsed successfully, find a match. 76 const hostname = parsedURL.hostname.replace(/^www\./i, ""); 77 const params = parsedURL.searchParams; 78 // NOTE: there will be an initial/default local copy of the data in m-c. 79 // Therefore, this should never return an empty list []. 80 const siteTypes = await RS("sites-classification").get(); 81 const sortedSiteTypes = siteTypes.sort( 82 (x, y) => (y.weight || 0) - (x.weight || 0) 83 ); 84 for (let type of sortedSiteTypes) { 85 for (let criteria of type.criteria) { 86 if (criteria.url && criteria.url !== url) { 87 continue; 88 } 89 if (criteria.hostname && criteria.hostname !== hostname) { 90 continue; 91 } 92 if (criteria.sld && criteria.sld !== hostname.split(".")[0]) { 93 continue; 94 } 95 if (criteria.params && !_hasParams(criteria.params, params)) { 96 continue; 97 } 98 return type.type; 99 } 100 } 101 } 102 return category; 103 }