InteractionsBlocklist.sys.mjs (8422B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 const lazy = {}; 6 7 ChromeUtils.defineESModuleGetters(lazy, { 8 FilterAdult: "resource:///modules/FilterAdult.sys.mjs", 9 UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", 10 }); 11 12 ChromeUtils.defineLazyGetter(lazy, "logConsole", function () { 13 return console.createInstance({ 14 prefix: "InteractionsBlocklist", 15 maxLogLevel: Services.prefs.getBoolPref( 16 "browser.places.interactions.log", 17 false 18 ) 19 ? "Debug" 20 : "Warn", 21 }); 22 }); 23 24 // A blocklist of regular expressions. Maps base hostnames to a list regular 25 // expressions for URLs with that base hostname. In this context, "base 26 // hostname" means the hostname without any subdomains or a public suffix. For 27 // example, the base hostname for "https://www.maps.google.com/a/place" is 28 // "google". We do this mapping to improve performance; otherwise we'd have to 29 // check all URLs against a long list of regular expressions. The regexes are 30 // defined as escaped strings so that we build them lazily. 31 // We may want to migrate this list to Remote Settings in the future. 32 let HOST_BLOCKLIST = { 33 auth0: [ 34 // Auth0 OAuth. 35 // XXX: Used alone this could produce false positives where an auth0 URL 36 // appears after another valid domain and TLD, but since we limit this to 37 // the auth0 hostname those occurrences will be filtered out. 38 "^https:\\/\\/.*\\.auth0\\.com\\/login", 39 ], 40 baidu: [ 41 // Baidu SERP 42 "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*", 43 ], 44 bing: [ 45 // Bing SERP 46 "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*", 47 ], 48 duckduckgo: [ 49 // DuckDuckGo SERP 50 "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*", 51 ], 52 google: [ 53 // Google SERP 54 "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*", 55 // Google OAuth 56 "^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth", 57 "^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent", 58 ], 59 microsoftonline: [ 60 // Microsoft OAuth 61 "^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize", 62 ], 63 yandex: [ 64 // Yandex SERP 65 "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*", 66 ], 67 zoom: [ 68 // Zoom meeting interstitial 69 "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+", 70 ], 71 }; 72 73 HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, { 74 get(target, property) { 75 let regexes = target[property]; 76 if (!regexes || !Array.isArray(regexes)) { 77 return null; 78 } 79 80 for (let i = 0; i < regexes.length; i++) { 81 let regex = regexes[i]; 82 if (typeof regex === "string") { 83 regex = new RegExp(regex, "i"); 84 if (regex) { 85 regexes[i] = regex; 86 } else { 87 throw new Error("Blocklist contains invalid regex."); 88 } 89 } 90 } 91 return regexes; 92 }, 93 }); 94 95 /** 96 * A class that maintains a blocklist of URLs. The class exposes a method to 97 * check if a particular URL is contained on the blocklist. 98 */ 99 class _InteractionsBlocklist { 100 constructor() { 101 // Load custom blocklist items from pref. 102 try { 103 let customBlocklist = JSON.parse( 104 Services.prefs.getStringPref( 105 "places.interactions.customBlocklist", 106 "[]" 107 ) 108 ); 109 if (!Array.isArray(customBlocklist)) { 110 throw new Error(); 111 } 112 let parsedBlocklist = customBlocklist.map( 113 regexStr => new RegExp(regexStr) 114 ); 115 HOST_BLOCKLIST["*"] = parsedBlocklist; 116 } catch (ex) { 117 lazy.logConsole.warn("places.interactions.customBlocklist is corrupted."); 118 } 119 } 120 121 /** 122 * Only certain urls can be added as Interactions, either manually or 123 * automatically. 124 * 125 * @returns {Map} A Map keyed by protocol, for each protocol an object may 126 * define stricter requirements, like extension. 127 */ 128 get urlRequirements() { 129 return new Map([ 130 ["http:", {}], 131 ["https:", {}], 132 ["file:", { extension: "pdf" }], 133 ]); 134 } 135 136 /** 137 * Whether to record interactions for a given URL. 138 * The rules are defined in InteractionsBlocklist.urlRequirements. 139 * 140 * @param {string|URL|nsIURI} url The URL to check. 141 * @returns {boolean} whether the url can be recorded. 142 */ 143 canRecordUrl(url) { 144 let protocol, pathname; 145 if (typeof url == "string") { 146 url = new URL(url); 147 } 148 if (url instanceof Ci.nsIURI) { 149 protocol = url.scheme + ":"; 150 pathname = url.filePath; 151 } else { 152 protocol = url.protocol; 153 pathname = url.pathname; 154 } 155 let requirements = InteractionsBlocklist.urlRequirements.get(protocol); 156 return ( 157 requirements && 158 (!requirements.extension || pathname.endsWith(requirements.extension)) 159 ); 160 } 161 162 /** 163 * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we 164 * should not record an interaction. 165 * 166 * @param {string} urlToCheck 167 * The URL we are looking for on the blocklist. 168 * @returns {boolean} 169 * True if `url` is on a blocklist. False otherwise. 170 */ 171 isUrlBlocklisted(urlToCheck) { 172 if (lazy.FilterAdult.isAdultUrl(urlToCheck)) { 173 return true; 174 } 175 176 if (!this.canRecordUrl(urlToCheck)) { 177 return true; 178 } 179 180 // First, find the URL's base host: the hostname without any subdomains or a 181 // public suffix. 182 let url = URL.parse(urlToCheck); 183 if (!url) { 184 lazy.logConsole.warn( 185 `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${urlToCheck}` 186 ); 187 return false; 188 } 189 190 if (url.protocol == "file:") { 191 return false; 192 } 193 194 let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost( 195 url.host 196 ); 197 let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim( 198 hostWithoutSuffix, 199 { 200 stripWww: true, 201 trimTrailingDot: true, 202 } 203 ); 204 let baseHost = hostWithSubdomains.substring( 205 hostWithSubdomains.lastIndexOf(".") + 1 206 ); 207 // Then fetch blocked regexes for that baseHost and compare them to the full 208 // URL. Also check the URL against the custom blocklist. 209 let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || []; 210 regexes.push(...(HOST_BLOCKLIST["*"] || [])); 211 if (!regexes) { 212 return false; 213 } 214 215 return regexes.some(r => r.test(url.href)); 216 } 217 218 /** 219 * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from 220 * the regex, we add it to a list of wildcard regexes. All URLs are checked 221 * against these wildcard regexes. Currently only exposed for tests and use in 222 * the console. In the future we could hook this up to a UI component. 223 * 224 * @param {string|RegExp} regexToAdd 225 * The regular expression to add to our blocklist. 226 */ 227 addRegexToBlocklist(regexToAdd) { 228 let regex; 229 try { 230 regex = new RegExp(regexToAdd, "i"); 231 } catch (ex) { 232 lazy.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); 233 return; 234 } 235 236 if (!HOST_BLOCKLIST["*"]) { 237 HOST_BLOCKLIST["*"] = []; 238 } 239 HOST_BLOCKLIST["*"].push(regex); 240 Services.prefs.setStringPref( 241 "places.interactions.customBlocklist", 242 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) 243 ); 244 } 245 246 /** 247 * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the 248 * blocklist, this is a no-op. Currently only exposed for tests and use in the 249 * console. In the future we could hook this up to a UI component. 250 * 251 * @param {string|RegExp} regexToRemove 252 * The regular expression to add to our blocklist. 253 */ 254 removeRegexFromBlocklist(regexToRemove) { 255 let regex; 256 try { 257 regex = new RegExp(regexToRemove, "i"); 258 } catch (ex) { 259 lazy.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); 260 return; 261 } 262 263 if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) { 264 return; 265 } 266 HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter( 267 curr => curr.source != regex.source 268 ); 269 Services.prefs.setStringPref( 270 "places.interactions.customBlocklist", 271 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) 272 ); 273 } 274 } 275 276 export const InteractionsBlocklist = new _InteractionsBlocklist();