tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit db5b81f81a0035a744e748c24dd571cfeeaa3c79
parent adcc104bc7d8afb196df6b3b3adf76bbe8d4c6f6
Author: alex <alex.the.stout5@gmail.com>
Date:   Tue, 21 Oct 2025 14:31:29 +0000

Bug 694856 - extract regexp and looksLikeUrl, looksLikeOrigin functions from UrlbarTokenizer.sys.mjs to new UrlUtils.sys.mjs module. r=urlbar-reviewers,Standard8

Differential Revision: https://phabricator.services.mozilla.com/D258759

Diffstat:
Mbrowser/components/urlbar/UrlbarController.sys.mjs | 5++---
Mbrowser/components/urlbar/UrlbarInput.sys.mjs | 5+++--
Mbrowser/components/urlbar/UrlbarProviderAutofill.sys.mjs | 5+++--
Mbrowser/components/urlbar/UrlbarProviderClipboard.sys.mjs | 5++---
Mbrowser/components/urlbar/UrlbarProviderHeuristicFallback.sys.mjs | 7++++---
Mbrowser/components/urlbar/UrlbarProviderInterventions.sys.mjs | 7++-----
Mbrowser/components/urlbar/UrlbarProviderSearchSuggestions.sys.mjs | 5+++--
Mbrowser/components/urlbar/UrlbarProviderTabToSearch.sys.mjs | 5++---
Mbrowser/components/urlbar/UrlbarProviderTokenAliasEngines.sys.mjs | 5++---
Mbrowser/components/urlbar/UrlbarSearchUtils.sys.mjs | 5++---
Mbrowser/components/urlbar/UrlbarTokenizer.sys.mjs | 207++++---------------------------------------------------------------------------
Mbrowser/components/urlbar/UrlbarUtils.sys.mjs | 7++++---
Mbrowser/components/urlbar/tests/unit/test_tokenizer.js | 131-------------------------------------------------------------------------------
Atoolkit/modules/UrlUtils.sys.mjs | 229+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtoolkit/modules/moz.build | 1+
Atoolkit/modules/tests/xpcshell/test_UrlUtils.js | 390+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtoolkit/modules/tests/xpcshell/xpcshell.toml | 2++
Mtools/@types/generated/lib.gecko.modules.d.ts | 10++++++++++
Mtools/@types/generated/tspaths.json | 27+++++++++++++++++++++++++++
19 files changed, 696 insertions(+), 362 deletions(-)

diff --git a/browser/components/urlbar/UrlbarController.sys.mjs b/browser/components/urlbar/UrlbarController.sys.mjs @@ -19,9 +19,8 @@ ChromeUtils.defineESModuleGetters(lazy, { "moz-src:///browser/components/urlbar/UrlbarProviderSemanticHistorySearch.sys.mjs", UrlbarProvidersManager: "moz-src:///browser/components/urlbar/UrlbarProvidersManager.sys.mjs", - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); ChromeUtils.defineLazyGetter(lazy, "logger", () => @@ -1517,7 +1516,7 @@ class TelemetryEvent { let searchWords = searchString .substring(0, lazy.UrlbarUtils.MAX_TEXT_LENGTH) .trim() - .split(lazy.UrlbarTokenizer.REGEXP_SPACES) + .split(lazy.UrlUtils.REGEXP_SPACES) .filter(t => t); let numWords = searchWords.length.toString(); diff --git a/browser/components/urlbar/UrlbarInput.sys.mjs b/browser/components/urlbar/UrlbarInput.sys.mjs @@ -52,6 +52,7 @@ const lazy = XPCOMUtils.declareLazy({ UrlbarView: "moz-src:///browser/components/urlbar/UrlbarView.sys.mjs", UrlbarSearchTermsPersistence: "moz-src:///browser/components/urlbar/UrlbarSearchTermsPersistence.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", ClipboardHelper: { service: "@mozilla.org/widget/clipboardhelper;1", iid: Ci.nsIClipboardHelper, @@ -1917,7 +1918,7 @@ export class UrlbarInput { this.focus(); } let trimmedValue = value.trim(); - let end = trimmedValue.search(lazy.UrlbarTokenizer.REGEXP_SPACES); + let end = trimmedValue.search(lazy.UrlUtils.REGEXP_SPACES); let firstToken = end == -1 ? trimmedValue : trimmedValue.substring(0, end); // Enter search mode if the string starts with a restriction token. let searchMode = this.searchModeForToken(firstToken); @@ -1935,7 +1936,7 @@ export class UrlbarInput { // in search mode. value = value.replace(firstToken, ""); } - if (lazy.UrlbarTokenizer.REGEXP_SPACES.test(value[0])) { + if (lazy.UrlUtils.REGEXP_SPACES.test(value[0])) { // If there was a trailing space after the restriction token/alias, // remove it. value = value.slice(1); diff --git a/browser/components/urlbar/UrlbarProviderAutofill.sys.mjs b/browser/components/urlbar/UrlbarProviderAutofill.sys.mjs @@ -24,6 +24,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarResult: "moz-src:///browser/components/urlbar/UrlbarResult.sys.mjs", UrlbarTokenizer: "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); ChromeUtils.defineLazyGetter(lazy, "pageFrecencyThreshold", () => { @@ -384,7 +385,7 @@ export class UrlbarProviderAutofill extends UrlbarProvider { // This may confuse completeDefaultIndex cause the AUTOCOMPLETE_MATCH // tokenizer ends up trimming the search string and returning a value // that doesn't match it, or is even shorter. - if (lazy.UrlbarTokenizer.REGEXP_SPACES.test(queryContext.searchString)) { + if (lazy.UrlUtils.REGEXP_SPACES.test(queryContext.searchString)) { return false; } @@ -989,7 +990,7 @@ export class UrlbarProviderAutofill extends UrlbarProvider { // at the end, we still treat it as an URL. let query, params; if ( - lazy.UrlbarTokenizer.looksLikeOrigin(this._searchString, { + lazy.UrlUtils.looksLikeOrigin(this._searchString, { ignoreKnownDomains: true, }) ) { diff --git a/browser/components/urlbar/UrlbarProviderClipboard.sys.mjs b/browser/components/urlbar/UrlbarProviderClipboard.sys.mjs @@ -12,8 +12,7 @@ const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { UrlbarResult: "moz-src:///browser/components/urlbar/UrlbarResult.sys.mjs", UrlbarPrefs: "moz-src:///browser/components/urlbar/UrlbarPrefs.sys.mjs", - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); const RESULT_MENU_COMMANDS = { @@ -63,7 +62,7 @@ export class UrlbarProviderClipboard extends UrlbarProvider { if ( !textFromClipboard || textFromClipboard.length > 2048 || - lazy.UrlbarTokenizer.REGEXP_SPACES.test(textFromClipboard) + lazy.UrlUtils.REGEXP_SPACES.test(textFromClipboard) ) { return false; } diff --git a/browser/components/urlbar/UrlbarProviderHeuristicFallback.sys.mjs b/browser/components/urlbar/UrlbarProviderHeuristicFallback.sys.mjs @@ -22,6 +22,7 @@ ChromeUtils.defineESModuleGetters(lazy, { "moz-src:///browser/components/urlbar/UrlbarSearchUtils.sys.mjs", UrlbarTokenizer: "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); /** @@ -78,11 +79,11 @@ export class UrlbarProviderHeuristicFallback extends UrlbarProvider { if (!URL.canParse(str)) { if ( lazy.UrlbarPrefs.get("keyword.enabled") && - (lazy.UrlbarTokenizer.looksLikeOrigin(str, { + (lazy.UrlUtils.looksLikeOrigin(str, { noIp: true, noPort: true, }) || - lazy.UrlbarTokenizer.REGEXP_COMMON_EMAIL.test(str)) + lazy.UrlUtils.REGEXP_COMMON_EMAIL.test(str)) ) { let searchResult = await this._engineSearchResult({ queryContext }); if (instance != this.queryInstance) { @@ -261,7 +262,7 @@ export class UrlbarProviderHeuristicFallback extends UrlbarProvider { queryContext.searchString, firstToken ); - if (!lazy.UrlbarTokenizer.REGEXP_SPACES_START.test(query)) { + if (!lazy.UrlUtils.REGEXP_SPACES_START.test(query)) { return null; } diff --git a/browser/components/urlbar/UrlbarProviderInterventions.sys.mjs b/browser/components/urlbar/UrlbarProviderInterventions.sys.mjs @@ -20,8 +20,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarProviderGlobalActions: "moz-src:///browser/components/urlbar/UrlbarProviderGlobalActions.sys.mjs", UrlbarResult: "moz-src:///browser/components/urlbar/UrlbarResult.sys.mjs", - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); ChromeUtils.defineLazyGetter(lazy, "appUpdater", () => new lazy.AppUpdater()); @@ -487,9 +486,7 @@ export class UrlbarProviderInterventions extends UrlbarProvider { if ( !queryContext.searchString || queryContext.searchString.length > UrlbarUtils.MAX_TEXT_LENGTH || - lazy.UrlbarTokenizer.REGEXP_LIKE_PROTOCOL.test( - queryContext.searchString - ) || + lazy.UrlUtils.REGEXP_LIKE_PROTOCOL.test(queryContext.searchString) || !EN_LOCALE_MATCH.test(Services.locale.appLocaleAsBCP47) || !Services.policies.isAllowed("urlbarinterventions") || (await this.queryInstance diff --git a/browser/components/urlbar/UrlbarProviderSearchSuggestions.sys.mjs b/browser/components/urlbar/UrlbarProviderSearchSuggestions.sys.mjs @@ -26,6 +26,7 @@ ChromeUtils.defineESModuleGetters(lazy, { "moz-src:///browser/components/urlbar/UrlbarSearchUtils.sys.mjs", UrlbarTokenizer: "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); /** @@ -54,7 +55,7 @@ const TRENDING_HELP_URL = function looksLikeUrl(str, ignoreAlphanumericHosts = false) { // Single word including special chars. return ( - !lazy.UrlbarTokenizer.REGEXP_SPACES.test(str) && + !lazy.UrlUtils.REGEXP_SPACES.test(str) && (["/", "@", ":", "["].some(c => str.includes(c)) || (ignoreAlphanumericHosts ? /^([\[\]A-Z0-9-]+\.){3,}[^.]+$/i.test(str) @@ -590,7 +591,7 @@ export class UrlbarProviderSearchSuggestions extends UrlbarProvider { // Match an alias only when it has a space after it. If there's no trailing // space, then continue to treat it as part of the search string. - if (!lazy.UrlbarTokenizer.REGEXP_SPACES_START.test(query)) { + if (!lazy.UrlUtils.REGEXP_SPACES_START.test(query)) { return null; } diff --git a/browser/components/urlbar/UrlbarProviderTabToSearch.sys.mjs b/browser/components/urlbar/UrlbarProviderTabToSearch.sys.mjs @@ -26,8 +26,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarResult: "moz-src:///browser/components/urlbar/UrlbarResult.sys.mjs", UrlbarSearchUtils: "moz-src:///browser/components/urlbar/UrlbarSearchUtils.sys.mjs", - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); const DYNAMIC_RESULT_TYPE = "onboardTabToSearch"; @@ -272,7 +271,7 @@ export class UrlbarProviderTabToSearch extends UrlbarProvider { ); // Skip any string that cannot be an origin. if ( - !lazy.UrlbarTokenizer.looksLikeOrigin(searchStr, { + !lazy.UrlUtils.looksLikeOrigin(searchStr, { ignoreKnownDomains: true, noIp: true, }) diff --git a/browser/components/urlbar/UrlbarProviderTokenAliasEngines.sys.mjs b/browser/components/urlbar/UrlbarProviderTokenAliasEngines.sys.mjs @@ -18,8 +18,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarResult: "moz-src:///browser/components/urlbar/UrlbarResult.sys.mjs", UrlbarSearchUtils: "moz-src:///browser/components/urlbar/UrlbarSearchUtils.sys.mjs", - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); /** @@ -177,7 +176,7 @@ export class UrlbarProviderTokenAliasEngines extends UrlbarProvider { // alias followed by a space. We enter search mode at that point. if ( lowerCaseSearchString.startsWith(alias) && - lazy.UrlbarTokenizer.REGEXP_SPACES_START.test( + lazy.UrlUtils.REGEXP_SPACES_START.test( lowerCaseSearchString.substring(alias.length) ) ) { diff --git a/browser/components/urlbar/UrlbarSearchUtils.sys.mjs b/browser/components/urlbar/UrlbarSearchUtils.sys.mjs @@ -16,8 +16,7 @@ import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; const lazy = XPCOMUtils.declareLazy({ - UrlbarTokenizer: - "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", separatePrivateDefaultUIEnabled: { pref: "browser.search.separatePrivateDefault.ui.enabled", @@ -154,7 +153,7 @@ class SearchUtils { let query = lazy.UrlbarUtils.substringAfter(searchString, alias); // Match an alias only when it has a space after it. If there's no trailing // space, then continue to treat it as part of the search string. - if (!lazy.UrlbarTokenizer.REGEXP_SPACES_START.test(query)) { + if (!lazy.UrlUtils.REGEXP_SPACES_START.test(query)) { return null; } } diff --git a/browser/components/urlbar/UrlbarTokenizer.sys.mjs b/browser/components/urlbar/UrlbarTokenizer.sys.mjs @@ -13,6 +13,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarPrefs: "moz-src:///browser/components/urlbar/UrlbarPrefs.sys.mjs", UrlbarUtils: "moz-src:///browser/components/urlbar/UrlbarUtils.sys.mjs", PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); ChromeUtils.defineLazyGetter(lazy, "logger", () => @@ -37,32 +38,6 @@ ChromeUtils.defineLazyGetter(lazy, "gFluentStrings", function () { let tokenToKeywords = new Map(); export var UrlbarTokenizer = { - // Regex matching on whitespaces. - REGEXP_SPACES: /\s+/, - REGEXP_SPACES_START: /^\s+/, - - // Regex used to guess url-like strings. - // These are not expected to be 100% correct, we accept some user mistypes - // and we're unlikely to be able to cover 100% of the cases. - REGEXP_LIKE_PROTOCOL: /^[A-Z+.-]+:\/*(?!\/)/i, - REGEXP_USERINFO_INVALID_CHARS: /[^\w.~%!$&'()*+,;=:-]/, - REGEXP_HOSTPORT_INVALID_CHARS: /[^\[\]A-Z0-9.:-]/i, - REGEXP_SINGLE_WORD_HOST: /^[^.:]+$/i, - REGEXP_HOSTPORT_IP_LIKE: /^(?=(.*[.:].*){2})[a-f0-9\.\[\]:]+$/i, - // This accepts partial IPv4. - REGEXP_HOSTPORT_INVALID_IP: - /\.{2,}|\d{5,}|\d{4,}(?![:\]])|^\.|^(\d+\.){4,}\d+$|^\d{4,}$/, - // This only accepts complete IPv4. - REGEXP_HOSTPORT_IPV4: /^(\d{1,3}\.){3,}\d{1,3}(:\d+)?$/, - // This accepts partial IPv6. - REGEXP_HOSTPORT_IPV6: /^\[([0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?$/i, - REGEXP_COMMON_EMAIL: /^[\w!#$%&'*+/=?^`{|}~.-]+@[\[\]A-Z0-9.-]+$/i, - REGEXP_HAS_PORT: /:\d+$/, - // Regex matching a percent encoded char at the beginning of a string. - REGEXP_PERCENT_ENCODED_START: /^(%[0-9a-f]{2}){2,}/i, - // Regex matching scheme and colon, plus, if present, two slashes. - REGEXP_PREFIX: /^[a-z-]+:(?:\/){0,2}/i, - TYPE: { TEXT: 1, // `looksLikeOrigin()` returned a value for this token that was neither @@ -155,172 +130,6 @@ export var UrlbarTokenizer = { }, /** - * Returns whether the passed in token looks like a URL. - * This is based on guessing and heuristics, that means if this function - * returns false, it's surely not a URL, if it returns true, the result must - * still be verified through URIFixup. - * - * @param {string} token - * The string token to verify - * @param {object} [options] - * @param {boolean} [options.requirePath] The url must have a path - * @returns {boolean} whether the token looks like a URL. - */ - looksLikeUrl(token, { requirePath = false } = {}) { - if (token.length < 2) { - return false; - } - // Ignore spaces and require path for the data: protocol. - if (token.startsWith("data:")) { - return token.length > 5; - } - if (this.REGEXP_SPACES.test(token)) { - return false; - } - // If it starts with something that looks like a protocol, it's likely a url. - if (this.REGEXP_LIKE_PROTOCOL.test(token)) { - return true; - } - // Guess path and prePath. At this point we should be analyzing strings not - // having a protocol. - let slashIndex = token.indexOf("/"); - let prePath = slashIndex != -1 ? token.slice(0, slashIndex) : token; - if (!this.looksLikeOrigin(prePath, { ignoreKnownDomains: true })) { - return false; - } - - let path = slashIndex != -1 ? token.slice(slashIndex) : ""; - lazy.logger.debug("path", path); - if (requirePath && !path) { - return false; - } - // If there are both path and userinfo, it's likely a url. - let atIndex = prePath.indexOf("@"); - let userinfo = atIndex != -1 ? prePath.slice(0, atIndex) : ""; - if (path.length && userinfo.length) { - return true; - } - - // If the first character after the slash in the path is a letter, then the - // token may be an "abc/def" url. - if (/^\/[a-z]/i.test(path)) { - return true; - } - // If the path contains special chars, it is likely a url. - if (["%", "?", "#"].some(c => path.includes(c))) { - return true; - } - - // The above looksLikeOrigin call told us the prePath looks like an origin, - // now we go into details checking some common origins. - let hostPort = atIndex != -1 ? prePath.slice(atIndex + 1) : prePath; - if (this.REGEXP_HOSTPORT_IPV4.test(hostPort)) { - return true; - } - // ipv6 is very complex to support, just check for a few chars. - if ( - this.REGEXP_HOSTPORT_IPV6.test(hostPort) && - ["[", "]", ":"].some(c => hostPort.includes(c)) - ) { - return true; - } - if (Services.uriFixup.isDomainKnown(hostPort)) { - return true; - } - return false; - }, - - /** - * Returns whether the passed in token looks like an origin. - * This is based on guessing and heuristics, that means if this function - * returns `NONE`, it's surely not an origin, but otherwise the result - * must still be verified through URIFixup. - * - * @param {string} token - * The string token to verify - * @param {object} options Options object - * @param {boolean} [options.ignoreKnownDomains] If true, the origin doesn't have to be - * in the known domain list - * @param {boolean} [options.noIp] If true, the origin cannot be an IP address - * @param {boolean} [options.noPort] If true, the origin cannot have a port number - * @returns {number} - * A `UrlbarTokenizer.LOOKS_LIKE_ORIGIN` value. - */ - looksLikeOrigin( - token, - { ignoreKnownDomains = false, noIp = false, noPort = false } = {} - ) { - if (!token.length) { - return this.LOOKS_LIKE_ORIGIN.NONE; - } - let atIndex = token.indexOf("@"); - if (atIndex != -1 && this.REGEXP_COMMON_EMAIL.test(token)) { - // We prefer handling it as an email rather than an origin with userinfo. - return this.LOOKS_LIKE_ORIGIN.NONE; - } - - let userinfo = atIndex != -1 ? token.slice(0, atIndex) : ""; - let hostPort = atIndex != -1 ? token.slice(atIndex + 1) : token; - let hasPort = this.REGEXP_HAS_PORT.test(hostPort); - lazy.logger.debug("userinfo", userinfo); - lazy.logger.debug("hostPort", hostPort); - if (noPort && hasPort) { - return this.LOOKS_LIKE_ORIGIN.NONE; - } - - if ( - this.REGEXP_HOSTPORT_IPV4.test(hostPort) || - this.REGEXP_HOSTPORT_IPV6.test(hostPort) - ) { - return noIp ? this.LOOKS_LIKE_ORIGIN.NONE : this.LOOKS_LIKE_ORIGIN.IP; - } - - // Check for invalid chars. - if ( - this.REGEXP_LIKE_PROTOCOL.test(hostPort) || - this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) || - this.REGEXP_HOSTPORT_INVALID_CHARS.test(hostPort) || - (!this.REGEXP_SINGLE_WORD_HOST.test(hostPort) && - this.REGEXP_HOSTPORT_IP_LIKE.test(hostPort) && - this.REGEXP_HOSTPORT_INVALID_IP.test(hostPort)) - ) { - return this.LOOKS_LIKE_ORIGIN.NONE; - } - - // If it looks like a single word host, check the known domains. - if ( - !ignoreKnownDomains && - !userinfo && - !hasPort && - this.REGEXP_SINGLE_WORD_HOST.test(hostPort) - ) { - return Services.uriFixup.isDomainKnown(hostPort) - ? this.LOOKS_LIKE_ORIGIN.KNOWN_DOMAIN - : this.LOOKS_LIKE_ORIGIN.NONE; - } - - if (atIndex != -1 || hasPort) { - return this.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT; - } - - return this.LOOKS_LIKE_ORIGIN.OTHER; - }, - - LOOKS_LIKE_ORIGIN: { - // The value cannot be an origin. - NONE: 0, - // The value may be an origin but it's not one of the other types. - // Example: "mozilla.org" - OTHER: 1, - // The value is an IP address (that may or may not be reachable). - IP: 2, - // The value is a domain known to URI fixup. - KNOWN_DOMAIN: 3, - // The value appears to be an origin with a userinfo or port. - USERINFO_OR_PORT: 4, - }, - - /** * Tokenizes the searchString from a UrlbarQueryContext. * * @param {UrlbarQueryContext} queryContext @@ -381,12 +190,12 @@ function splitString({ searchString, searchMode }) { if (trimmed.startsWith("data:")) { tokens = [trimmed]; } else if (trimmed.length < 500) { - tokens = trimmed.split(UrlbarTokenizer.REGEXP_SPACES); + tokens = trimmed.split(lazy.UrlUtils.REGEXP_SPACES); } else { // If the string is very long, tokenizing all of it would be expensive. So // we only tokenize a part of it, then let the last token become a // catch-all. - tokens = trimmed.substring(0, 500).split(UrlbarTokenizer.REGEXP_SPACES); + tokens = trimmed.substring(0, 500).split(lazy.UrlUtils.REGEXP_SPACES); tokens[tokens.length - 1] += trimmed.substring(500); } @@ -415,7 +224,7 @@ function splitString({ searchString, searchMode }) { // token. if ( CHAR_TO_TYPE_MAP.has(firstToken[0]) && - !UrlbarTokenizer.REGEXP_PERCENT_ENCODED_START.test(firstToken) && + !lazy.UrlUtils.REGEXP_PERCENT_ENCODED_START.test(firstToken) && !searchMode ) { tokens[0] = firstToken.substring(1); @@ -468,15 +277,15 @@ function filterTokens(tokens) { if (restrictionType) { restrictions.push({ index: i, type: restrictionType }); } else { - let looksLikeOrigin = UrlbarTokenizer.looksLikeOrigin(token); + let looksLikeOrigin = lazy.UrlUtils.looksLikeOrigin(token); if ( - looksLikeOrigin == UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER && + looksLikeOrigin == lazy.UrlUtils.LOOKS_LIKE_ORIGIN.OTHER && lazy.UrlbarPrefs.get("allowSearchSuggestionsForSimpleOrigins") ) { tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED; - } else if (looksLikeOrigin != UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE) { + } else if (looksLikeOrigin != lazy.UrlUtils.LOOKS_LIKE_ORIGIN.NONE) { tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN; - } else if (UrlbarTokenizer.looksLikeUrl(token, { requirePath: true })) { + } else if (lazy.UrlUtils.looksLikeUrl(token, { requirePath: true })) { tokenObj.type = UrlbarTokenizer.TYPE.POSSIBLE_URL; } } diff --git a/browser/components/urlbar/UrlbarUtils.sys.mjs b/browser/components/urlbar/UrlbarUtils.sys.mjs @@ -37,6 +37,7 @@ ChromeUtils.defineESModuleGetters(lazy, { UrlbarTokenizer: "moz-src:///browser/components/urlbar/UrlbarTokenizer.sys.mjs", BrowserUIUtils: "resource:///modules/BrowserUIUtils.sys.mjs", + UrlUtils: "resource://gre/modules/UrlUtils.sys.mjs", }); XPCOMUtils.defineLazyServiceGetter( @@ -1021,7 +1022,7 @@ export var UrlbarUtils = { * then [prefix, remainder]. Otherwise, ["", str]. */ stripURLPrefix(str) { - let match = lazy.UrlbarTokenizer.REGEXP_PREFIX.exec(str); + let match = lazy.UrlUtils.REGEXP_PREFIX.exec(str); if (!match) { return ["", str]; } @@ -1189,10 +1190,10 @@ export var UrlbarUtils = { // Create `URL` objects to make the logic below easier. The strings must // include schemes for this to work. - if (!lazy.UrlbarTokenizer.REGEXP_PREFIX.test(urlString)) { + if (!lazy.UrlUtils.REGEXP_PREFIX.test(urlString)) { urlString = "http://" + urlString; } - if (!lazy.UrlbarTokenizer.REGEXP_PREFIX.test(candidateString)) { + if (!lazy.UrlUtils.REGEXP_PREFIX.test(candidateString)) { candidateString = "http://" + candidateString; } diff --git a/browser/components/urlbar/tests/unit/test_tokenizer.js b/browser/components/urlbar/tests/unit/test_tokenizer.js @@ -1,137 +1,6 @@ /* Any copyright is dedicated to the Public Domain. * http://creativecommons.org/publicdomain/zero/1.0/ */ -add_task(async function looksLikeOrigin() { - let tests = [ - { - token: "", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "user@example.com", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "user:pass@example.com", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, - }, - - { - token: "example.com:1234", - // This should be `USERINFO_OR_PORT`, but it matches - // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - { - token: "example.com:1234", - args: { noPort: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "user@example.com:1234", - // This should be `USERINFO_OR_PORT`, but it matches - // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - { - token: "user@example.com:1234", - args: { noPort: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "user:pass@example.com:1234", - // This should be `USERINFO_OR_PORT`, but it matches - // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - { - token: "user:pass@example.com:1234", - args: { noPort: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "1.2.3.4", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.IP, - }, - { - token: "1.2.3.4", - args: { noIp: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "[2001:0db8:0000:0000:0000:8a2e:0370:7334]", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.IP, - }, - { - token: "[2001:0db8:0000:0000:0000:8a2e:0370:7334]", - args: { noIp: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "[2001:db8::8a2e:370:7334]", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.IP, - }, - { - token: "[2001:db8::8a2e:370:7334]", - args: { noIp: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "a!@#$%^&( z", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "example", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.NONE, - }, - - { - token: "localhost", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.KNOWN_DOMAIN, - }, - { - token: "localhost", - args: { ignoreKnownDomains: true }, - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER, - }, - - { - token: "example.com", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER, - }, - { - token: "example.co", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER, - }, - { - token: "example.c", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER, - }, - { - token: "example.", - expected: UrlbarTokenizer.LOOKS_LIKE_ORIGIN.OTHER, - }, - ]; - - for (let { token, args, expected } of tests) { - Assert.strictEqual( - UrlbarTokenizer.looksLikeOrigin(token, args), - expected, - "looksLikeOrigin should return expected value: " + - JSON.stringify({ token, args }) - ); - } -}); - add_task(async function test_tokenizer() { let testContexts = [ { desc: "Empty string", searchString: "", expectedTokens: [] }, diff --git a/toolkit/modules/UrlUtils.sys.mjs b/toolkit/modules/UrlUtils.sys.mjs @@ -0,0 +1,229 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * This module exports useful regular expressions for matching components of a + * url as well as functions for checking if a given string looks like a url or + * a part of one. + * + * Emails are explicitly NOT counted as urls since we want to deal with them + * separately. + */ + +export const UrlUtils = { + // Regex matching on whitespaces. + REGEXP_SPACES: /\s+/, + REGEXP_SPACES_START: /^\s+/, + + // Regex used to guess url-like strings. + // These are not expected to be 100% correct, we accept some user mistypes + // and we're unlikely to be able to cover 100% of the cases. + REGEXP_LIKE_PROTOCOL: /^[A-Z+.-]+:\/*(?!\/)/i, + REGEXP_USERINFO_INVALID_CHARS: /[^\w.~%!$&'()*+,;=:-]/, + REGEXP_HOSTPORT_INVALID_CHARS: /[^\[\]A-Z0-9.:-]/i, + REGEXP_SINGLE_WORD_HOST: /^[^.:]+$/i, + REGEXP_HOSTPORT_IP_LIKE: /^(?=(.*[.:].*){2})[a-f0-9\.\[\]:]+$/i, + // This accepts partial IPv4. + REGEXP_HOSTPORT_INVALID_IP: + /\.{2,}|\d{5,}|\d{4,}(?![:\]])|^\.|^(\d+\.){4,}\d+$|^\d{4,}$/, + // This only accepts complete IPv4. + REGEXP_HOSTPORT_IPV4: /^(\d{1,3}\.){3,}\d{1,3}(:\d+)?$/, + // This accepts partial IPv6. + REGEXP_HOSTPORT_IPV6: /^\[([0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?$/i, + REGEXP_COMMON_EMAIL: /^[\w!#$%&'*+/=?^`{|}~.-]+@[\[\]A-Z0-9.-]+$/i, + REGEXP_HAS_PORT: /:\d+$/, + // Regex matching a percent encoded char at the beginning of a string. + REGEXP_PERCENT_ENCODED_START: /^(%[0-9a-f]{2}){2,}/i, + // Regex matching scheme and colon, plus, if present, two slashes. + REGEXP_PREFIX: /^[a-z-]+:(?:\/){0,2}/i, + + /** + * Returns whether the passed in token looks like a URL. + * This is based on guessing and heuristics, that means if this function + * returns false, it's surely not a URL, if it returns true, the result must + * still be verified through URIFixup. + * + * @param {string} token + * The string token to verify + * @param {object} [options] + * @param {boolean} [options.requirePath] + * The url must have a path + * @param {ConsoleInstance} [logger] + * Optional logger for debugging + * @returns {boolean} + * Whether the token looks like a URL + */ + looksLikeUrl(token, { requirePath = false } = {}, logger) { + if (token.length < 2) { + return false; + } + // Ignore spaces and require path for the data: protocol. + if (token.startsWith("data:")) { + return token.length > 5; + } + if (this.REGEXP_SPACES.test(token)) { + return false; + } + // If it starts with something that looks like a protocol, it's likely a url. + if (this.REGEXP_LIKE_PROTOCOL.test(token)) { + return true; + } + // Guess path and prePath. At this point we should be analyzing strings not + // having a protocol. + let slashIndex = token.indexOf("/"); + let prePath = slashIndex != -1 ? token.slice(0, slashIndex) : token; + if (!this.looksLikeOrigin(prePath, { ignoreKnownDomains: true })) { + return false; + } + + let path = slashIndex != -1 ? token.slice(slashIndex) : ""; + logger?.debug("path", path); + if (requirePath && !path) { + return false; + } + // If there are both path and userinfo, it's likely a url. + let atIndex = prePath.indexOf("@"); + let userinfo = atIndex != -1 ? prePath.slice(0, atIndex) : ""; + if (path.length && userinfo.length) { + return true; + } + + // If the first character after the slash in the path is a letter, then the + // token may be an "abc/def" url. + if (/^\/[a-z]/i.test(path)) { + return true; + } + // If the path contains special chars, it is likely a url. + if (["%", "?", "#"].some(c => path.includes(c))) { + return true; + } + + // The above looksLikeOrigin call told us the prePath looks like an origin, + // now we go into details checking some common origins. + let hostPort = atIndex != -1 ? prePath.slice(atIndex + 1) : prePath; + if (this.REGEXP_HOSTPORT_IPV4.test(hostPort)) { + return true; + } + // ipv6 is very complex to support, just check for a few chars. + if ( + this.REGEXP_HOSTPORT_IPV6.test(hostPort) && + ["[", "]", ":"].some(c => hostPort.includes(c)) + ) { + return true; + } + if (Services.uriFixup.isDomainKnown(hostPort)) { + return true; + } + return false; + }, + + /** + * Returns whether the passed in token looks like an origin. + * This is based on guessing and heuristics, that means if this function + * returns `NONE`, it's surely not an origin, but otherwise the result + * must still be verified through URIFixup. + * + * @param {string} token + * The string token to verify + * @param {object} options + * Options object + * @param {boolean} [options.ignoreKnownDomains] + * If true, the origin doesn't have to be in the known domain list + * @param {boolean} [options.noIp] + * If true, the origin cannot be an IP address + * @param {boolean} [options.noPort] + * If true, the origin cannot have a port number + * @param {ConsoleInstance} [logger] + * Optional logger for debugging + * @returns {number} + * A `UrlUtils.LOOKS_LIKE_ORIGIN` value. + */ + looksLikeOrigin( + token, + { ignoreKnownDomains = false, noIp = false, noPort = false } = {}, + logger + ) { + if (!token.length) { + return this.LOOKS_LIKE_ORIGIN.NONE; + } + let atIndex = token.indexOf("@"); + if (atIndex != -1 && this.REGEXP_COMMON_EMAIL.test(token)) { + // We prefer handling it as an email rather than an origin with userinfo. + return this.LOOKS_LIKE_ORIGIN.NONE; + } + + let userinfo = atIndex != -1 ? token.slice(0, atIndex) : ""; + let hostPort = atIndex != -1 ? token.slice(atIndex + 1) : token; + let hasPort = this.REGEXP_HAS_PORT.test(hostPort); + logger?.debug("userinfo", userinfo); + logger?.debug("hostPort", hostPort); + if (noPort && hasPort) { + return this.LOOKS_LIKE_ORIGIN.NONE; + } + + if ( + this.REGEXP_HOSTPORT_IPV4.test(hostPort) || + this.REGEXP_HOSTPORT_IPV6.test(hostPort) + ) { + return noIp ? this.LOOKS_LIKE_ORIGIN.NONE : this.LOOKS_LIKE_ORIGIN.IP; + } + + // Check for invalid chars. + if ( + this.REGEXP_LIKE_PROTOCOL.test(hostPort) || + this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) || + this.REGEXP_HOSTPORT_INVALID_CHARS.test(hostPort) || + (!this.REGEXP_SINGLE_WORD_HOST.test(hostPort) && + this.REGEXP_HOSTPORT_IP_LIKE.test(hostPort) && + this.REGEXP_HOSTPORT_INVALID_IP.test(hostPort)) + ) { + return this.LOOKS_LIKE_ORIGIN.NONE; + } + + // If it looks like a single word host, check the known domains. + if ( + !ignoreKnownDomains && + !userinfo && + !hasPort && + this.REGEXP_SINGLE_WORD_HOST.test(hostPort) + ) { + return Services.uriFixup.isDomainKnown(hostPort) + ? this.LOOKS_LIKE_ORIGIN.KNOWN_DOMAIN + : this.LOOKS_LIKE_ORIGIN.NONE; + } + + if (atIndex != -1 || hasPort) { + return this.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT; + } + + return this.LOOKS_LIKE_ORIGIN.OTHER; + }, + + /** + * The result type for `looksLikeOrigin()`. + */ + LOOKS_LIKE_ORIGIN: Object.freeze({ + /** + * The value cannot be an origin. + */ + NONE: 0, + /** + * The value may be an origin but it's not one of the other types. + * Example: "mozilla.org" + */ + OTHER: 1, + /** + * The value is an IP address (that may or may not be reachable). + */ + IP: 2, + /** + * The value is a domain known to URI fixup. + */ + KNOWN_DOMAIN: 3, + /** + * The value appears to be an origin with a userinfo or port. + */ + USERINFO_OR_PORT: 4, + }), +}; diff --git a/toolkit/modules/moz.build b/toolkit/modules/moz.build @@ -211,6 +211,7 @@ EXTRA_JS_MODULES += [ "Timer.sys.mjs", "Troubleshoot.sys.mjs", "UpdateUtils.sys.mjs", + "UrlUtils.sys.mjs", "WebChannel.sys.mjs", ] diff --git a/toolkit/modules/tests/xpcshell/test_UrlUtils.js b/toolkit/modules/tests/xpcshell/test_UrlUtils.js @@ -0,0 +1,390 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { UrlUtils } = ChromeUtils.importESModule( + "resource://gre/modules/UrlUtils.sys.mjs" +); + +add_task(function test_looksLikeUrl() { + const tests = [ + // ===== START: SECTION_DONT_REQUIRE_PATH ================================== + // Does not look like a url. + { url: "", isUrl: false }, + { url: " ", isUrl: false }, + { url: "\t", isUrl: false }, + { url: "\n\n", isUrl: false }, + { url: "cheese/1", isUrl: false }, + { url: "cheese", isUrl: false }, + { url: "hello@this", isUrl: false }, + { url: "hello/", isUrl: false }, + { url: "google.com", isUrl: false }, + { url: "google.com/", isUrl: false }, + { url: "connect.mozilla.org", isUrl: false }, + { url: "hello@example.com", isUrl: false }, + + // Looks like a url. + { url: "hello:this", isUrl: true }, + { url: "cheese/hello", isUrl: true }, + { url: "hello/%", isUrl: true }, + { url: "hello/?", isUrl: true }, + { url: "hello/#", isUrl: true }, + { url: "google.com/hello", isUrl: true }, + { url: "192.168.0.1", isUrl: true }, + { url: "192.168.0.1/hello", isUrl: true }, + { url: "192.168.0.1:4000", isUrl: true }, + { url: "192.168.0.1:4000/hello", isUrl: true }, + { url: "hello:password@google.com", isUrl: true }, + + // Is a url because of protocol. + { url: "https://google.com", isUrl: true }, + { url: "https://google.com/", isUrl: true }, + { url: "https://google.com/hello", isUrl: true }, + { url: "https://192.168.0.1", isUrl: true }, + { url: "https://192.168.0.1/hello", isUrl: true }, + { url: "https://cheese/hello", isUrl: true }, + // ===== END: SECTION_DONT_REQUIRE_PATH ==================================== + + // ===== START: SECTION_REQUIRE_PATH ======================================= + + // Does not look like a url. + { url: "", isUrl: false, options: { requirePath: true } }, + { url: " ", isUrl: false, options: { requirePath: true } }, + { url: "\t", isUrl: false, options: { requirePath: true } }, + { url: "\n\n", isUrl: false, options: { requirePath: true } }, + { url: "cheese/1", isUrl: false, options: { requirePath: true } }, + { url: "cheese", isUrl: false, options: { requirePath: true } }, + { url: "hello@this", isUrl: false, options: { requirePath: true } }, + { url: "hello/", isUrl: false, options: { requirePath: true } }, + { url: "google.com", isUrl: false, options: { requirePath: true } }, + { url: "google.com/", isUrl: false, options: { requirePath: true } }, + { + url: "connect.mozilla.org", + isUrl: false, + options: { requirePath: true }, + }, + { url: "192.168.0.1", isUrl: false, options: { requirePath: true } }, + { url: "192.168.0.1:4000", isUrl: false, options: { requirePath: true } }, + + // Emails are explicitly NOT counted as urls since we want to deal with + // them separately. + { url: "hello@example.com", isUrl: false, options: { requirePath: true } }, + + // Looks like a url. + { url: "cheese/hello", isUrl: true, options: { requirePath: true } }, + { url: "hello:this", isUrl: true, options: { requirePath: true } }, + { url: "hello/%", isUrl: true, options: { requirePath: true } }, + { url: "hello/?", isUrl: true, options: { requirePath: true } }, + { url: "hello/#", isUrl: true, options: { requirePath: true } }, + { url: "google.com/hello", isUrl: true, options: { requirePath: true } }, + { url: "192.168.0.1/hello", isUrl: true, options: { requirePath: true } }, + { + url: "192.168.0.1:4000/hello", + isUrl: true, + options: { requirePath: true }, + }, + { + url: "hello:password@google.com", + isUrl: true, + options: { requirePath: true }, + }, + + // Is a url because of protocol. + { url: "https://google.com", isUrl: true, options: { requirePath: true } }, + { url: "https://google.com/", isUrl: true, options: { requirePath: true } }, + { url: "https://192.168.0.1", isUrl: true, options: { requirePath: true } }, + { + url: "https://google.com/hello", + isUrl: true, + options: { requirePath: true }, + }, + { + url: "https://192.168.0.1/hello", + isUrl: true, + options: { requirePath: true }, + }, + { + url: "https://cheese/hello", + isUrl: true, + options: { requirePath: true }, + }, + + // ===== END: SECTION_REQUIRE_PATH ========================================= + ]; + + for (let test of tests) { + const { url, options, isUrl } = test; + + info(`Testing '${url}' with options '${JSON.stringify(options)}'`); + + Assert.strictEqual( + UrlUtils.looksLikeUrl(url, options), + isUrl, + `'${url}' with options '${JSON.stringify(options)}' should${ + isUrl ? "" : "n't" + } be a url` + ); + } +}); + +add_task(function test_looksLikeOrigin() { + const tests = [ + // Test normal use + { origin: "", expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE }, + { origin: " ", expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE }, + + { + origin: ":pass@host", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { + origin: "user:@host", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { + origin: "user:pass@host", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { origin: ":1000", expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT }, + + { origin: "cheese", expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE }, + { origin: "mozilla.org", expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER }, + { + origin: "connect.mozilla.org", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { origin: "192.168.0.1", expected: UrlUtils.LOOKS_LIKE_ORIGIN.IP }, + + { origin: "cheese:4000", expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE }, + { origin: "mozilla.org:4000", expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE }, + { + origin: "connect.mozilla.org:4000", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { origin: "192.168.0.1:4000", expected: UrlUtils.LOOKS_LIKE_ORIGIN.IP }, + + // Ignore known domains only applies to one word origins. + { + origin: "cheese", + options: { ignoreKnownDomains: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "cheese:4000", + options: { ignoreKnownDomains: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "user:pass@cheese", + options: { ignoreKnownDomains: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + + // Test origin cannot be an ip. + { + origin: "mozilla.org", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "connect.mozilla.org", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "192.168.0.1", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "192.168.0.1:4000", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "user:pass@192.168.0.1:4000", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + // Test ignore port + { + origin: ":pass@host", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { + origin: "user:@host", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { + origin: "user:pass@host", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + { + origin: ":1000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "cheese:4000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "mozilla.org:4000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "connect.mozilla.org:4000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "192.168.0.1:4000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "user:pass@192.168.0.1:4000", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + // Tests merged in from test_tokenizer.js + { + origin: "", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "user@example.com", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "user:pass@example.com", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.USERINFO_OR_PORT, + }, + + { + origin: "example.com:1234", + // This should be `USERINFO_OR_PORT`, but it matches + // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "example.com:1234", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "user@example.com:1234", + // This should be `USERINFO_OR_PORT`, but it matches + // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "user@example.com:1234", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "user:pass@example.com:1234", + // This should be `USERINFO_OR_PORT`, but it matches + // `REGEXP_LIKE_PROTOCOL`, so it returns `NONE`. + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + { + origin: "user:pass@example.com:1234", + options: { noPort: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "1.2.3.4", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.IP, + }, + { + origin: "1.2.3.4", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "[2001:0db8:0000:0000:0000:8a2e:0370:7334]", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.IP, + }, + { + origin: "[2001:0db8:0000:0000:0000:8a2e:0370:7334]", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "[2001:db8::8a2e:370:7334]", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.IP, + }, + { + origin: "[2001:db8::8a2e:370:7334]", + options: { noIp: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "a!@#$%^&( z", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "example", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.NONE, + }, + + { + origin: "localhost", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.KNOWN_DOMAIN, + }, + { + origin: "localhost", + options: { ignoreKnownDomains: true }, + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + + { + origin: "example.com", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "example.co", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "example.c", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + { + origin: "example.", + expected: UrlUtils.LOOKS_LIKE_ORIGIN.OTHER, + }, + ]; + + for (let test of tests) { + const { origin, options, expected } = test; + + info(`Testing '${origin}' with options '${JSON.stringify(options)}'`); + + Assert.strictEqual( + UrlUtils.looksLikeOrigin(origin, options), + expected, + "looksLikeOrigin should return expected value: " + + JSON.stringify({ origin, options }) + ); + } +}); diff --git a/toolkit/modules/tests/xpcshell/xpcshell.toml b/toolkit/modules/tests/xpcshell/xpcshell.toml @@ -113,6 +113,8 @@ skip-if = ["!updater"] reason = "LOCALE is not defined without MOZ_UPDATER" tags = "os_integration" +["test_UrlUtils.js"] + ["test_firstStartup.js"] skip-if = [ "os == 'android'", diff --git a/tools/@types/generated/lib.gecko.modules.d.ts b/tools/@types/generated/lib.gecko.modules.d.ts @@ -323,6 +323,7 @@ export interface Modules { "moz-src:///toolkit/components/uniffi-bindgen-gecko-js/tests/generated/RustUniffiBindingsTests.sys.mjs": typeof import("moz-src:///toolkit/components/uniffi-bindgen-gecko-js/tests/generated/RustUniffiBindingsTests.sys.mjs"), "moz-src:///toolkit/components/uniffi-bindgen-gecko-js/tests/generated/RustUniffiBindingsTestsExternalTypes.sys.mjs": typeof import("moz-src:///toolkit/components/uniffi-bindgen-gecko-js/tests/generated/RustUniffiBindingsTestsExternalTypes.sys.mjs"), "moz-src:///toolkit/profile/ProfilesDatastoreService.sys.mjs": typeof import("moz-src:///toolkit/profile/ProfilesDatastoreService.sys.mjs"), + "moz-src://gre/browser/components/urlbar/UrlUtils.sys.mjs": typeof import("moz-src://gre/browser/components/urlbar/UrlUtils.sys.mjs"), "resource:///actors/AboutLoginsParent.sys.mjs": typeof import("resource:///actors/AboutLoginsParent.sys.mjs"), "resource:///actors/AboutNewTabParent.sys.mjs": typeof import("resource:///actors/AboutNewTabParent.sys.mjs"), "resource:///actors/AboutPrivateBrowsingParent.sys.mjs": typeof import("resource:///actors/AboutPrivateBrowsingParent.sys.mjs"), @@ -405,6 +406,14 @@ export interface Modules { "resource:///modules/ThemeVariableMap.sys.mjs": typeof import("resource:///modules/ThemeVariableMap.sys.mjs"), "resource:///modules/TransientPrefs.sys.mjs": typeof import("resource:///modules/TransientPrefs.sys.mjs"), "resource:///modules/URILoadingHelper.sys.mjs": typeof import("resource:///modules/URILoadingHelper.sys.mjs"), + "resource:///modules/UrlbarPrefs.sys.mjs": typeof import("resource:///modules/UrlbarPrefs.sys.mjs"), + "resource:///modules/UrlbarProviderGlobalActions.sys.mjs": typeof import("resource:///modules/UrlbarProviderGlobalActions.sys.mjs"), + "resource:///modules/UrlbarProviderSemanticHistorySearch.sys.mjs": typeof import("resource:///modules/UrlbarProviderSemanticHistorySearch.sys.mjs"), + "resource:///modules/UrlbarProvidersManager.sys.mjs": typeof import("resource:///modules/UrlbarProvidersManager.sys.mjs"), + "resource:///modules/UrlbarResult.sys.mjs": typeof import("resource:///modules/UrlbarResult.sys.mjs"), + "resource:///modules/UrlbarSearchTermsPersistence.sys.mjs": typeof import("resource:///modules/UrlbarSearchTermsPersistence.sys.mjs"), + "resource:///modules/UrlbarSearchUtils.sys.mjs": typeof import("resource:///modules/UrlbarSearchUtils.sys.mjs"), + "resource:///modules/UrlbarTokenizer.sys.mjs": typeof import("resource:///modules/UrlbarTokenizer.sys.mjs"), "resource:///modules/UrlbarUtils.sys.mjs": typeof import("resource:///modules/UrlbarUtils.sys.mjs"), "resource:///modules/WebProtocolHandlerRegistrar.sys.mjs": typeof import("resource:///modules/WebProtocolHandlerRegistrar.sys.mjs"), "resource:///modules/WindowsJumpLists.sys.mjs": typeof import("resource:///modules/WindowsJumpLists.sys.mjs"), @@ -885,6 +894,7 @@ export interface Modules { "resource://gre/modules/UpdateTimerManager.sys.mjs": typeof import("resource://gre/modules/UpdateTimerManager.sys.mjs"), "resource://gre/modules/UpdateUtils.sys.mjs": typeof import("resource://gre/modules/UpdateUtils.sys.mjs"), "resource://gre/modules/UrlClassifierRemoteSettingsService.sys.mjs": typeof import("resource://gre/modules/UrlClassifierRemoteSettingsService.sys.mjs"), + "resource://gre/modules/UrlUtils.sys.mjs": typeof import("resource://gre/modules/UrlUtils.sys.mjs"), "resource://gre/modules/UsageReporting.sys.mjs": typeof import("resource://gre/modules/UsageReporting.sys.mjs"), "resource://gre/modules/WPTEventsParent.sys.mjs": typeof import("resource://gre/modules/WPTEventsParent.sys.mjs"), "resource://gre/modules/WebAuthnFeature.sys.mjs": typeof import("resource://gre/modules/WebAuthnFeature.sys.mjs"), diff --git a/tools/@types/generated/tspaths.json b/tools/@types/generated/tspaths.json @@ -1076,6 +1076,30 @@ "resource:///modules/UnitConverterTimezone.sys.mjs": [ "browser/components/urlbar/unitconverters/UnitConverterTimezone.sys.mjs" ], + "resource:///modules/UrlbarPrefs.sys.mjs": [ + "browser/components/urlbar/UrlbarPrefs.sys.mjs" + ], + "resource:///modules/UrlbarProviderGlobalActions.sys.mjs": [ + "browser/components/urlbar/UrlbarProviderGlobalActions.sys.mjs" + ], + "resource:///modules/UrlbarProviderSemanticHistorySearch.sys.mjs": [ + "browser/components/urlbar/UrlbarProviderSemanticHistorySearch.sys.mjs" + ], + "resource:///modules/UrlbarProvidersManager.sys.mjs": [ + "browser/components/urlbar/UrlbarProvidersManager.sys.mjs" + ], + "resource:///modules/UrlbarResult.sys.mjs": [ + "browser/components/urlbar/UrlbarResult.sys.mjs" + ], + "resource:///modules/UrlbarSearchTermsPersistence.sys.mjs": [ + "browser/components/urlbar/UrlbarSearchTermsPersistence.sys.mjs" + ], + "resource:///modules/UrlbarSearchUtils.sys.mjs": [ + "browser/components/urlbar/UrlbarSearchUtils.sys.mjs" + ], + "resource:///modules/UrlbarTokenizer.sys.mjs": [ + "browser/components/urlbar/UrlbarTokenizer.sys.mjs" + ], "resource:///modules/UrlbarUtils.sys.mjs": [ "browser/components/urlbar/UrlbarUtils.sys.mjs" ], @@ -6164,6 +6188,9 @@ "resource://gre/modules/UrlClassifierRemoteSettingsService.sys.mjs": [ "toolkit/components/url-classifier/UrlClassifierRemoteSettingsService.sys.mjs" ], + "resource://gre/modules/UrlUtils.sys.mjs": [ + "toolkit/modules/UrlUtils.sys.mjs" + ], "resource://gre/modules/UsageReporting.sys.mjs": [ "toolkit/components/telemetry/app/UsageReporting.sys.mjs" ],