commit 09d005857125bfc9c95c6b26fa2a09e3672e3cdf
parent 9012dc1ded6e7743b79456c55b384145999acca3
Author: Neil Deakin <neil@mozilla.com>
Date: Mon, 1 Dec 2025 22:22:55 +0000
Bug Bug 1931403, when a form field does not have an associated label, search backwards from the field looking for nearby text, r=dimi,credential-management-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D270959
Diffstat:
4 files changed, 228 insertions(+), 41 deletions(-)
diff --git a/browser/extensions/formautofill/test/browser/heuristics/third_party/browser_Lush.js b/browser/extensions/formautofill/test/browser/heuristics/third_party/browser_Lush.js
@@ -22,6 +22,7 @@ add_heuristic_tests(
{ fieldName: "cc-name", reason: "fathom" },
{ fieldName: "cc-exp-month", reason: "regex-heuristic" },
{ fieldName: "cc-exp-year", reason: "regex-heuristic" },
+ { fieldName: "cc-csc", reason: "regex-heuristic" },
],
},
],
diff --git a/browser/extensions/formautofill/test/unit/test_findLabelElements.js b/browser/extensions/formautofill/test/unit/test_findLabelElements.js
@@ -13,6 +13,7 @@ const TESTCASES = [
</label>
</form>`,
expectedLabelIds: [["labelA"]],
+ expectedText: ["label type A"],
},
{
description: "Input contains in a label element.",
@@ -23,6 +24,7 @@ const TESTCASES = [
</label>`,
inputId: "typeB",
expectedLabelIds: [["labelB"]],
+ expectedText: ["label type B inner div"],
},
{
description: '"for" attribute used to indicate input by one label.',
@@ -30,6 +32,7 @@ const TESTCASES = [
<input id="typeC" type="text">`,
inputId: "typeC",
expectedLabelIds: [["labelC"]],
+ expectedText: [""],
},
{
description: '"for" attribute used to indicate input by multiple labels.',
@@ -41,6 +44,7 @@ const TESTCASES = [
</form>`,
inputId: "typeD",
expectedLabelIds: [["labelD1", "labelD2", "labelD3"]],
+ expectedText: [""],
},
{
description:
@@ -52,6 +56,7 @@ const TESTCASES = [
<input id=" typeE " type="text">`,
inputId: " typeE ",
expectedLabelIds: [["labelE4"]],
+ expectedText: [""],
},
{
description: "Input contains in a label element.",
@@ -63,6 +68,7 @@ const TESTCASES = [
</label>`,
inputId: "typeF",
expectedLabelIds: [["labelF"], [""]],
+ expectedText: ["inner label", ""],
},
{
description:
@@ -75,6 +81,7 @@ const TESTCASES = [
<label id="labelG3" for="typeG">label type G3</label>`,
inputId: "typeG",
expectedLabelIds: [["labelG1", "labelG2", "labelG3"]],
+ expectedText: [""],
},
{
description:
@@ -87,6 +94,7 @@ const TESTCASES = [
</form>`,
inputId: "labelH1",
expectedLabelIds: [["labelH1"], ["labelH2"]],
+ expectedText: ["", ""],
},
{
description:
@@ -100,6 +108,7 @@ const TESTCASES = [
</form>`,
inputId: "labelI1",
expectedLabelIds: [["labelI1"], []],
+ expectedText: ["", ""],
},
{
description: "three labels with no for attribute or child.",
@@ -114,6 +123,7 @@ const TESTCASES = [
</form>`,
inputId: "labelJ1",
expectedLabelIds: [["labelJ2"], []],
+ expectedText: ["", ""],
},
{
description: "four labels with no for attribute or child.",
@@ -131,6 +141,7 @@ const TESTCASES = [
</form>`,
inputId: "labelK1",
expectedLabelIds: [[], ["labelK2"], ["labelK3"], []],
+ expectedText: ["", "", "", ""],
},
{
description:
@@ -145,6 +156,95 @@ const TESTCASES = [
</form>`,
inputId: "labelK1",
expectedLabelIds: [[], [], ["labelL2"], []],
+ expectedText: ["", "", "", ""],
+ },
+ {
+ description: "input fields with no labels.",
+ document: `<form>
+ First Name: <input id="inputL1">
+ Additional Name: <input>
+ Last Name: <input>
+ <span>Telephone</span>: <input>
+ <span>Country:</span><select><option>France<option>Germany</select>
+ <span>Email <b>address</b>:</span><input id="inputL2">
+ </form>`,
+ inputId: "inputL1",
+ expectedLabelIds: [[], [], [], [], [], []],
+ expectedText: [
+ "First Name:",
+ "Additional Name:",
+ "Last Name:",
+ "Telephone:",
+ "Country:",
+ "Email address:",
+ ],
+ },
+ {
+ description: "input fields with no labels and mixed labels.",
+ document: `<form>
+ First Name: <input id="inputM1">
+ Last <output>output</output>Name: <input>
+ <div><span>Telephone</span></div>: <input>
+ <input>
+ <label id="labelL1" for="inputM1">Given Name</label>
+ </form>`,
+ inputId: "inputM1",
+ expectedLabelIds: [["labelL1"], [], [], []],
+ expectedText: ["First Name:", "Name:", "Telephone:", ""],
+ },
+ {
+ description: "input fields with no labels with deeply nested text.",
+ document: `<form>
+ <p><span><b>First Name</b></span</p>: <input id="inputN1">
+ <p><span><i> Last Name </i> </span </p> : <p><span><input></span></p>
+ <div><div><div><div><div>Telephone</div></div> Number:</div></div></div><input>
+ <p><input>Text</p>
+ </form>`,
+ inputId: "inputN1",
+ expectedLabelIds: [[], [], [], []],
+ expectedText: ["First Name:", "Last Name :", "Telephone Number:", ""],
+ },
+ {
+ description:
+ "input fields with no labels and other elements that shouldn't be labels.",
+ document: `<form>
+ Please fill in:
+ <fieldset>First Name</fieldset><input id="inputO1">
+ (Optional)
+ <button>Last Name</button><input>
+ Telephone<input>
+ <p><input>Text</p>
+ </form>`,
+ inputId: "inputO1",
+ expectedLabelIds: [[], [], [], []],
+ expectedText: ["", "", "Telephone", ""],
+ },
+ {
+ description: "input fields labels in other languages.",
+ document: `<form>
+ 이름 <input id="inputP1">
+ മറുപേര് <input>
+ <span>телефон</span>: <input>
+ </form>`,
+ inputId: "inputP1",
+ expectedLabelIds: [[], [], []],
+ expectedText: ["이름", "മറുപേര്", "телефон:"],
+ },
+ {
+ description: "input fields with labels too far away.",
+ document: `<form>
+ <span><b>Hello</b>
+ <span><span><span><span><span>
+ </span></span></span></span></span>
+ <input id="inputQ1">
+ <span><b>Goodbye</b>
+ <span><span><span><span><span><span>
+ </span></span></span></span></span></span>
+ <input id="inputQ2">
+ </form>`,
+ inputId: "inputQ1",
+ expectedLabelIds: [[], []],
+ expectedText: ["Hello", ""],
},
];
@@ -157,14 +257,18 @@ TESTCASES.forEach(testcase => {
testcase.document
);
- let formElements = doc.querySelectorAll("input", "select");
+ let formElements = doc.querySelectorAll("input, select");
let labelsIndex = 0;
for (let formElement of formElements) {
let labels = LabelUtils.findLabelElements(formElement);
Assert.deepEqual(
labels.map(l => l.id),
- testcase.expectedLabelIds[labelsIndex++]
+ testcase.expectedLabelIds[labelsIndex]
);
+
+ let text = LabelUtils.findNearbyText(formElement);
+ Assert.deepEqual(text, testcase.expectedText[labelsIndex]);
+ labelsIndex++;
}
LabelUtils.clearLabelMap();
diff --git a/toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs b/toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs
@@ -1270,6 +1270,12 @@ export const FormAutofillHeuristics = {
yield* lazy.LabelUtils.extractLabelStrings(label);
}
+ // If no labels were found, look for nearby text that could
+ // be used as a label.
+ if (!labels.length) {
+ yield lazy.LabelUtils.findNearbyText(element);
+ }
+
const ariaLabels = element.getAttribute("aria-label");
if (ariaLabels) {
yield* [ariaLabels];
diff --git a/toolkit/components/formautofill/shared/LabelUtils.sys.mjs b/toolkit/components/formautofill/shared/LabelUtils.sys.mjs
@@ -23,6 +23,10 @@ export const LabelUtils = {
// @type {Map<string, array>}
_mappedLabels: null,
+ // A map of elements that don't have associated <label> elements but there
+ // is nearby text that can form a label. The values in this map are the text.
+ _mappedText: null,
+
// A weak map consisting of label element and extracted strings pairs.
// @type {WeakMap<HTMLLabelElement, array>}
_labelStrings: null,
@@ -116,63 +120,134 @@ export const LabelUtils = {
* map of form controls that have already potentially matched
*/
findNextFormControl(element, reverse, potentialLabels) {
- // Ignore elements and stop searching for elements that are already potentially
- // labelled or are form elements that cannot be autofilled.
- while ((element = this.nextElementInOrder(element, reverse))) {
- if (potentialLabels.has(element)) {
- break;
- } else if (
- lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element)
- ) {
- return element;
- } else if (
- [
- "button",
- "input",
- "label",
- "meter",
- "output",
- "progress",
- "select",
- "textarea",
- ].includes(element.localName)
- ) {
- break;
+ let filter = e => {
+ // Ignore elements and stop searching for elements that are already
+ // potentially labelled or are form elements that cannot be autofilled.
+ if (e.nodeType == Node.ELEMENT_NODE) {
+ if (potentialLabels.has(e)) {
+ return null;
+ } else if (lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(e)) {
+ // Return this form element.
+ return e;
+ }
+ }
+
+ return false;
+ };
+
+ return this.iterateNodes(element, reverse, filter);
+ },
+
+ /**
+ * Iterate over the nodes in a tree and call the filter on each one. We
+ * don't use an existing iterator (such as a TreeWalker) because we want
+ * to traverse the tree by visting the parents along the way first. The
+ * filter should return exactly false if the node is not accepted and
+ * iteration should continue. Otherwise, the value returned by the filter
+ * is returned. The iteration also stops and returns null if
+ * shouldStopIterating returns true for an element.
+ */
+ iterateNodes(element, reverse, filter) {
+ while (element) {
+ let next = reverse ? element.previousSibling : element.nextSibling;
+ if (!next) {
+ element = element.parentNode;
+ if (element && this.shouldStopIterating(element)) {
+ return null;
+ }
+ } else {
+ let child = next;
+ while (child) {
+ if (filter) {
+ let filterResult = filter(child);
+ if (filterResult !== false) {
+ return filterResult;
+ }
+ }
+
+ if (
+ child.nodeType == Node.ELEMENT_NODE &&
+ this.shouldStopIterating(child)
+ ) {
+ return null;
+ }
+
+ element = child;
+ child = reverse ? child.lastChild : child.firstChild;
+ }
}
}
return null;
},
- nextElementInOrder(element, reverse) {
- let result = reverse ? element.lastElementChild : element.firstElementChild;
- if (result) {
- return result;
+ // Return true if this is a form control or other element where iterating
+ // should stop.
+ shouldStopIterating(element) {
+ return [
+ "button",
+ "input",
+ "label",
+ "meter",
+ "output",
+ "progress",
+ "select",
+ "textarea",
+ "form",
+ "fieldset",
+ "script",
+ "style",
+ ].includes(element.localName);
+ },
+
+ /**
+ * Given an element that doesn't have an associated label, iterate backwards
+ * and find inline text nearby that likely serves as the label.
+ */
+ findNearbyText(element) {
+ if (this._mappedText.has(element)) {
+ return this._mappedText.get(element);
}
- while (element) {
- result = reverse
- ? element.previousElementSibling
- : element.nextElementSibling;
- if (result) {
- return result;
- }
+ let txt = "";
+ let current = element;
+
+ // A simple guard to prevent searching too far.
+ let count = 10;
- element = element.parentNode;
+ let returnTextNode = node => {
+ // As a shortcut, if text was already found, stop iterating when a
+ // div element was found.
if (
- !element ||
- element.localName == "form" ||
- element.localName == "fieldset"
+ !count-- ||
+ (current.nodeType == Node.ELEMENT_NODE &&
+ current.localName == "div" &&
+ txt.length)
) {
- break;
+ return null;
+ }
+
+ return node.nodeType == Node.TEXT_NODE ? node : false;
+ };
+
+ while ((current = this.iterateNodes(current, true, returnTextNode))) {
+ let textContent = current.nodeValue;
+ if (textContent) {
+ // Prepend the found text.
+ txt = textContent + txt;
}
}
- return null;
+ // Always add the element even where there is no text, so that it isn't
+ // searched for again.
+ txt = txt.replace(/\s{2,}/g, " ").trim(); // Collapse duplicate whitespaces
+ this._mappedText.set(element, txt);
+ return txt;
},
generateLabelMap(doc) {
this._mappedLabels = new Map();
+ this._mappedText = new Map();
this._labelStrings = new WeakMap();
// A map of potential label -> control for labels that don't have an id or
@@ -214,6 +289,7 @@ export const LabelUtils = {
clearLabelMap() {
this._mappedLabels = null;
+ this._mappedText = null;
this._labelStrings = null;
},