tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 09d005857125bfc9c95c6b26fa2a09e3672e3cdf
parent 9012dc1ded6e7743b79456c55b384145999acca3
Author: Neil Deakin <neil@mozilla.com>
Date:   Mon,  1 Dec 2025 22:22:55 +0000

Bug Bug 1931403, when a form field does not have an associated label, search backwards from the field looking for nearby text, r=dimi,credential-management-reviewers

Differential Revision: https://phabricator.services.mozilla.com/D270959

Diffstat:
Mbrowser/extensions/formautofill/test/browser/heuristics/third_party/browser_Lush.js | 1+
Mbrowser/extensions/formautofill/test/unit/test_findLabelElements.js | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtoolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs | 6++++++
Mtoolkit/components/formautofill/shared/LabelUtils.sys.mjs | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
4 files changed, 228 insertions(+), 41 deletions(-)

diff --git a/browser/extensions/formautofill/test/browser/heuristics/third_party/browser_Lush.js b/browser/extensions/formautofill/test/browser/heuristics/third_party/browser_Lush.js @@ -22,6 +22,7 @@ add_heuristic_tests( { fieldName: "cc-name", reason: "fathom" }, { fieldName: "cc-exp-month", reason: "regex-heuristic" }, { fieldName: "cc-exp-year", reason: "regex-heuristic" }, + { fieldName: "cc-csc", reason: "regex-heuristic" }, ], }, ], diff --git a/browser/extensions/formautofill/test/unit/test_findLabelElements.js b/browser/extensions/formautofill/test/unit/test_findLabelElements.js @@ -13,6 +13,7 @@ const TESTCASES = [ </label> </form>`, expectedLabelIds: [["labelA"]], + expectedText: ["label type A"], }, { description: "Input contains in a label element.", @@ -23,6 +24,7 @@ const TESTCASES = [ </label>`, inputId: "typeB", expectedLabelIds: [["labelB"]], + expectedText: ["label type B inner div"], }, { description: '"for" attribute used to indicate input by one label.', @@ -30,6 +32,7 @@ const TESTCASES = [ <input id="typeC" type="text">`, inputId: "typeC", expectedLabelIds: [["labelC"]], + expectedText: [""], }, { description: '"for" attribute used to indicate input by multiple labels.', @@ -41,6 +44,7 @@ const TESTCASES = [ </form>`, inputId: "typeD", expectedLabelIds: [["labelD1", "labelD2", "labelD3"]], + expectedText: [""], }, { description: @@ -52,6 +56,7 @@ const TESTCASES = [ <input id=" typeE " type="text">`, inputId: " typeE ", expectedLabelIds: [["labelE4"]], + expectedText: [""], }, { description: "Input contains in a label element.", @@ -63,6 +68,7 @@ const TESTCASES = [ </label>`, inputId: "typeF", expectedLabelIds: [["labelF"], [""]], + expectedText: ["inner label", ""], }, { description: @@ -75,6 +81,7 @@ const TESTCASES = [ <label id="labelG3" for="typeG">label type G3</label>`, inputId: "typeG", expectedLabelIds: [["labelG1", "labelG2", "labelG3"]], + expectedText: [""], }, { description: @@ -87,6 +94,7 @@ const TESTCASES = [ </form>`, inputId: "labelH1", expectedLabelIds: [["labelH1"], ["labelH2"]], + expectedText: ["", ""], }, { description: @@ -100,6 +108,7 @@ const TESTCASES = [ </form>`, inputId: "labelI1", expectedLabelIds: [["labelI1"], []], + expectedText: ["", ""], }, { description: "three labels with no for attribute or child.", @@ -114,6 +123,7 @@ const TESTCASES = [ </form>`, inputId: "labelJ1", expectedLabelIds: [["labelJ2"], []], + expectedText: ["", ""], }, { description: "four labels with no for attribute or child.", @@ -131,6 +141,7 @@ const TESTCASES = [ </form>`, inputId: "labelK1", expectedLabelIds: [[], ["labelK2"], ["labelK3"], []], + expectedText: ["", "", "", ""], }, { description: @@ -145,6 +156,95 @@ const TESTCASES = [ </form>`, inputId: "labelK1", expectedLabelIds: [[], [], ["labelL2"], []], + expectedText: ["", "", "", ""], + }, + { + description: "input fields with no labels.", + document: `<form> + First Name: <input id="inputL1"> + Additional Name: <input> + Last Name: <input> + <span>Telephone</span>: <input> + <span>Country:</span><select><option>France<option>Germany</select> + <span>Email <b>address</b>:</span><input id="inputL2"> + </form>`, + inputId: "inputL1", + expectedLabelIds: [[], [], [], [], [], []], + expectedText: [ + "First Name:", + "Additional Name:", + "Last Name:", + "Telephone:", + "Country:", + "Email address:", + ], + }, + { + description: "input fields with no labels and mixed labels.", + document: `<form> + First Name: <input id="inputM1"> + Last <output>output</output>Name: <input> + <div><span>Telephone</span></div>: <input> + <input> + <label id="labelL1" for="inputM1">Given Name</label> + </form>`, + inputId: "inputM1", + expectedLabelIds: [["labelL1"], [], [], []], + expectedText: ["First Name:", "Name:", "Telephone:", ""], + }, + { + description: "input fields with no labels with deeply nested text.", + document: `<form> + <p><span><b>First Name</b></span</p>: <input id="inputN1"> + <p><span><i> Last Name </i> </span </p> : <p><span><input></span></p> + <div><div><div><div><div>Telephone</div></div> Number:</div></div></div><input> + <p><input>Text</p> + </form>`, + inputId: "inputN1", + expectedLabelIds: [[], [], [], []], + expectedText: ["First Name:", "Last Name :", "Telephone Number:", ""], + }, + { + description: + "input fields with no labels and other elements that shouldn't be labels.", + document: `<form> + Please fill in: + <fieldset>First Name</fieldset><input id="inputO1"> + (Optional) + <button>Last Name</button><input> + Telephone<input> + <p><input>Text</p> + </form>`, + inputId: "inputO1", + expectedLabelIds: [[], [], [], []], + expectedText: ["", "", "Telephone", ""], + }, + { + description: "input fields labels in other languages.", + document: `<form> + 이름 <input id="inputP1"> + മറുപേര് <input> + <span>телефон</span>: <input> + </form>`, + inputId: "inputP1", + expectedLabelIds: [[], [], []], + expectedText: ["이름", "മറുപേര്", "телефон:"], + }, + { + description: "input fields with labels too far away.", + document: `<form> + <span><b>Hello</b> + <span><span><span><span><span> + </span></span></span></span></span> + <input id="inputQ1"> + <span><b>Goodbye</b> + <span><span><span><span><span><span> + </span></span></span></span></span></span> + <input id="inputQ2"> + </form>`, + inputId: "inputQ1", + expectedLabelIds: [[], []], + expectedText: ["Hello", ""], }, ]; @@ -157,14 +257,18 @@ TESTCASES.forEach(testcase => { testcase.document ); - let formElements = doc.querySelectorAll("input", "select"); + let formElements = doc.querySelectorAll("input, select"); let labelsIndex = 0; for (let formElement of formElements) { let labels = LabelUtils.findLabelElements(formElement); Assert.deepEqual( labels.map(l => l.id), - testcase.expectedLabelIds[labelsIndex++] + testcase.expectedLabelIds[labelsIndex] ); + + let text = LabelUtils.findNearbyText(formElement); + Assert.deepEqual(text, testcase.expectedText[labelsIndex]); + labelsIndex++; } LabelUtils.clearLabelMap(); diff --git a/toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs b/toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs @@ -1270,6 +1270,12 @@ export const FormAutofillHeuristics = { yield* lazy.LabelUtils.extractLabelStrings(label); } + // If no labels were found, look for nearby text that could + // be used as a label. + if (!labels.length) { + yield lazy.LabelUtils.findNearbyText(element); + } + const ariaLabels = element.getAttribute("aria-label"); if (ariaLabels) { yield* [ariaLabels]; diff --git a/toolkit/components/formautofill/shared/LabelUtils.sys.mjs b/toolkit/components/formautofill/shared/LabelUtils.sys.mjs @@ -23,6 +23,10 @@ export const LabelUtils = { // @type {Map<string, array>} _mappedLabels: null, + // A map of elements that don't have associated <label> elements but there + // is nearby text that can form a label. The values in this map are the text. + _mappedText: null, + // A weak map consisting of label element and extracted strings pairs. // @type {WeakMap<HTMLLabelElement, array>} _labelStrings: null, @@ -116,63 +120,134 @@ export const LabelUtils = { * map of form controls that have already potentially matched */ findNextFormControl(element, reverse, potentialLabels) { - // Ignore elements and stop searching for elements that are already potentially - // labelled or are form elements that cannot be autofilled. - while ((element = this.nextElementInOrder(element, reverse))) { - if (potentialLabels.has(element)) { - break; - } else if ( - lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element) - ) { - return element; - } else if ( - [ - "button", - "input", - "label", - "meter", - "output", - "progress", - "select", - "textarea", - ].includes(element.localName) - ) { - break; + let filter = e => { + // Ignore elements and stop searching for elements that are already + // potentially labelled or are form elements that cannot be autofilled. + if (e.nodeType == Node.ELEMENT_NODE) { + if (potentialLabels.has(e)) { + return null; + } else if (lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(e)) { + // Return this form element. + return e; + } + } + + return false; + }; + + return this.iterateNodes(element, reverse, filter); + }, + + /** + * Iterate over the nodes in a tree and call the filter on each one. We + * don't use an existing iterator (such as a TreeWalker) because we want + * to traverse the tree by visting the parents along the way first. The + * filter should return exactly false if the node is not accepted and + * iteration should continue. Otherwise, the value returned by the filter + * is returned. The iteration also stops and returns null if + * shouldStopIterating returns true for an element. + */ + iterateNodes(element, reverse, filter) { + while (element) { + let next = reverse ? element.previousSibling : element.nextSibling; + if (!next) { + element = element.parentNode; + if (element && this.shouldStopIterating(element)) { + return null; + } + } else { + let child = next; + while (child) { + if (filter) { + let filterResult = filter(child); + if (filterResult !== false) { + return filterResult; + } + } + + if ( + child.nodeType == Node.ELEMENT_NODE && + this.shouldStopIterating(child) + ) { + return null; + } + + element = child; + child = reverse ? child.lastChild : child.firstChild; + } } } return null; }, - nextElementInOrder(element, reverse) { - let result = reverse ? element.lastElementChild : element.firstElementChild; - if (result) { - return result; + // Return true if this is a form control or other element where iterating + // should stop. + shouldStopIterating(element) { + return [ + "button", + "input", + "label", + "meter", + "output", + "progress", + "select", + "textarea", + "form", + "fieldset", + "script", + "style", + ].includes(element.localName); + }, + + /** + * Given an element that doesn't have an associated label, iterate backwards + * and find inline text nearby that likely serves as the label. + */ + findNearbyText(element) { + if (this._mappedText.has(element)) { + return this._mappedText.get(element); } - while (element) { - result = reverse - ? element.previousElementSibling - : element.nextElementSibling; - if (result) { - return result; - } + let txt = ""; + let current = element; + + // A simple guard to prevent searching too far. + let count = 10; - element = element.parentNode; + let returnTextNode = node => { + // As a shortcut, if text was already found, stop iterating when a + // div element was found. if ( - !element || - element.localName == "form" || - element.localName == "fieldset" + !count-- || + (current.nodeType == Node.ELEMENT_NODE && + current.localName == "div" && + txt.length) ) { - break; + return null; + } + + return node.nodeType == Node.TEXT_NODE ? node : false; + }; + + while ((current = this.iterateNodes(current, true, returnTextNode))) { + let textContent = current.nodeValue; + if (textContent) { + // Prepend the found text. + txt = textContent + txt; } } - return null; + // Always add the element even where there is no text, so that it isn't + // searched for again. + txt = txt.replace(/\s{2,}/g, " ").trim(); // Collapse duplicate whitespaces + this._mappedText.set(element, txt); + return txt; }, generateLabelMap(doc) { this._mappedLabels = new Map(); + this._mappedText = new Map(); this._labelStrings = new WeakMap(); // A map of potential label -> control for labels that don't have an id or @@ -214,6 +289,7 @@ export const LabelUtils = { clearLabelMap() { this._mappedLabels = null; + this._mappedText = null; this._labelStrings = null; },