commit 36b0ce8f9d5806fdfaa4c5d9909a2504677f5700 parent 5f658ee430a79b6058cba0f25d7445ed5161deae Author: Cristina Horotan <chorotan@mozilla.com> Date: Sat, 25 Oct 2025 00:27:45 +0300 Revert "Bug 1861698, Bug 1995634 - Add translations langId telemetry r=translations-reviewers,gregtatum" for causing tr*ns failures on browser_translations_full_page_telemetry_auto_translate.js This reverts commit 483b2f0f07885e4aa12884b596aae96ce896dddf. Revert "Bug 1995634 - Use nsIDocumentEncoder for Translations langId r=translations-reviewers,gregtatum" This reverts commit a07fd9772bbb931e789d55a0af554a7a8850d5b1. Diffstat:
14 files changed, 30 insertions(+), 433 deletions(-)
diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_auto_translate.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_auto_translate.js @@ -123,23 +123,5 @@ add_task(async function test_translations_telemetry_auto_translate() { expectedEventCount: 1, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_basics.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_basics.js @@ -89,23 +89,5 @@ add_task(async function test_translations_telemetry_basics() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_open_panel.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_open_panel.js @@ -89,23 +89,5 @@ add_task(async function test_translations_telemetry_open_panel() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer.js @@ -83,23 +83,5 @@ add_task(async function test_translations_panel_auto_offer() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 3, - assertForAllEvents: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer_settings.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer_settings.js @@ -116,23 +116,5 @@ add_task(async function test_translations_panel_auto_offer_settings() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_retranslate.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_retranslate.js @@ -160,23 +160,5 @@ add_task(async function test_translations_telemetry_retranslate() { expectedEventCount: 2, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_switch_languages.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_switch_languages.js @@ -95,24 +95,6 @@ add_task(async function test_translations_telemetry_switch_from_language() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); @@ -206,23 +188,5 @@ add_task(async function test_translations_telemetry_switch_to_language() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_failure.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_failure.js @@ -25,24 +25,6 @@ add_task( await FullPageTranslationsTestUtils.assertPageIsNotTranslated(runInPage); await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - - await TestTranslationsTelemetry.assertEvent( Glean.translations.translationRequest, { expectedEventCount: 0, @@ -128,24 +110,6 @@ add_task( expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); } ); @@ -168,24 +132,6 @@ add_task(async function test_translations_telemetry_auto_translation_failure() { await FullPageTranslationsTestUtils.assertPageIsNotTranslated(runInPage); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await TestTranslationsTelemetry.assertEvent(Glean.translationsPanel.open, { expectedEventCount: 1, expectNewFlowId: true, @@ -247,23 +193,5 @@ add_task(async function test_translations_telemetry_auto_translation_failure() { expectedEventCount: 0, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_request.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_request.js @@ -90,24 +90,6 @@ add_task(async function test_translations_telemetry_manual_translation() { expectedEventCount: 1, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); @@ -171,23 +153,5 @@ add_task(async function test_translations_telemetry_auto_translation() { expectedEventCount: 1, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 1, - assertForMostRecentEvent: { - html_lang_attribute: "es", - identified_language: "es", - lang_tags_match: true, - is_lang_attribute_valid: true, - extracted_code_units: 2132, - extraction_time: ms => 0 < ms && ms < 250, - identification_time: ms => 0 < ms && ms < 250, - total_time: ms => 0 < ms && ms < 500, - confident: true, - }, - } - ); - await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_unsupported_lang.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_unsupported_lang.js @@ -208,12 +208,5 @@ add_task(async function test_translations_telemetry_unsupported_lang() { expectedEventCount: 1, }); - await TestTranslationsTelemetry.assertEvent( - Glean.translations.identifyPageLanguage, - { - expectedEventCount: 0, - } - ); - await cleanup(); }); diff --git a/toolkit/components/translations/TranslationsTelemetry.sys.mjs b/toolkit/components/translations/TranslationsTelemetry.sys.mjs @@ -89,49 +89,6 @@ export class TranslationsTelemetry { } /** - * Records a telemetry event when the language of a page is identified for Translations. - * - * @param {object} data - * @param {string | null} [data.htmlLangAttribute] - * @param {string} data.identifiedLanguage - * @param {boolean | null} [data.langTagsMatch] - * @param {boolean | null} [data.isLangAttributeValid] - * @param {number} data.extractedCodeUnits - * @param {number} data.extractionTime - * @param {number} data.identificationTime - * @param {number} data.totalTime - * @param {boolean} data.confident - */ - static onIdentifyPageLanguage(data) { - const { - htmlLangAttribute, - identifiedLanguage, - langTagsMatch, - isLangAttributeValid, - extractedCodeUnits, - extractionTime, - identificationTime, - totalTime, - confident, - } = data; - Glean.translations.identifyPageLanguage.record({ - html_lang_attribute: htmlLangAttribute, - identified_language: identifiedLanguage, - lang_tags_match: langTagsMatch, - is_lang_attribute_valid: isLangAttributeValid, - extracted_code_units: extractedCodeUnits, - extraction_time: extractionTime, - identification_time: identificationTime, - total_time: totalTime, - confident, - }); - TranslationsTelemetry.logEventToConsole( - TranslationsTelemetry.onIdentifyPageLanguage, - data - ); - } - - /** * Records a telemetry event when full page translation fails. * * @param {string} errorMessage diff --git a/toolkit/components/translations/actors/TranslationsChild.sys.mjs b/toolkit/components/translations/actors/TranslationsChild.sys.mjs @@ -78,27 +78,6 @@ export class TranslationsChild extends JSWindowActorChild { this.#translatedDoc?.enterLazyTranslationsMode(); return undefined; } - case "Translations:ExtractPageText": { - const { document } = this; - if (!document) { - return ""; - } - - const { sufficientLength } = data; - - const encoder = Cu.createDocumentEncoder("text/plain"); - encoder.init( - document, - "text/plain", - Ci.nsIDocumentEncoder.OutputBodyOnly | - Ci.nsIDocumentEncoder.SkipInvisibleContent | - Ci.nsIDocumentEncoder.AllowCrossShadowBoundary | - Ci.nsIDocumentEncoder.OutputDropInvisibleBreak | - Ci.nsIDocumentEncoder.OutputDisallowLineBreaking - ); - - return encoder.encodeToStringWithMaxLength(sufficientLength); - } case "Translations:TranslatePage": { if (this.#translatedDoc?.engineStatus === "error") { this.#translatedDoc.destroy(); diff --git a/toolkit/components/translations/actors/TranslationsParent.sys.mjs b/toolkit/components/translations/actors/TranslationsParent.sys.mjs @@ -3554,8 +3554,7 @@ export class TranslationsParent extends JSWindowActorParent { } /** - * Extracts a substring of visible text from the content document and - * runs it through the language detector to determine the page's language. + * Uses the page extractor to identify the current page's language. * * @returns {Promise<DetectionResult>} */ @@ -3564,89 +3563,53 @@ export class TranslationsParent extends JSWindowActorParent { return this.languageState.detectedLanguages.identified; } - lazy.console.log( - "Beginning text extraction:", - this.browsingContext?.currentURI?.spec - ); - - const extractionStartTime = ChromeUtils.now(); - const pageText = await this.sendQuery("Translations:ExtractPageText", { - sufficientLength: 4096, - }); + const actor = + this.browsingContext?.currentWindowGlobal?.getActor("PageExtractor"); - if (this.#isDestroyed) { - return { language: "en", confident: false, languages: [] }; + if (!actor) { + throw new Error("Unable to get the PageExtractor actor."); } - const extractionTime = ChromeUtils.now() - extractionStartTime; + const startTime = ChromeUtils.now(); - lazy.console.debug( - `Extracted Page Text (${pageText.length} code units):\n\n`, - pageText - ); - - const extractionLog = - `Extracted ${pageText.length} code units of text in ` + - `${extractionTime.toFixed(3)} ms.`; - - lazy.console.log(extractionLog); - ChromeUtils.addProfilerMarker( - "TranslationsParent", - { startTime: extractionStartTime, innerWindowId: this.innerWindowId }, - extractionLog - ); + // Manual profiling on 10 page loads of https://es.wikipedia.org/wiki/Felis_catus: + // ------------------------------------------------------------------------------- + // + // No limit: 2064 samples, 224/237/294 [min/med/max]ms (~85k code units) + // 8192 limit: 681 samples, 75/ 87/128 [min/med/max]ms + // 4096 limit: 457 samples, 51/ 55/ 97 [min/med/max]ms + // 2048 limit: 240 samples, 29/ 39/ 64 [min/med/max]ms + // 1024 limit: 142 samples, 19/ 28/ 58 [min/med/max]ms + // + // 2048 Code units feels like a decent length for performance and sample size. + const pageText = await actor.getText({ sufficientLength: 2048 }); + if (this.#isDestroyed) { + return { language: "", confident: false, languages: [] }; + } - const identificationStartTime = ChromeUtils.now(); const result = await lazy.LanguageDetector.detectLanguage(pageText); - if (this.#isDestroyed) { - return { language: "en", confident: false, languages: [] }; + return { language: "", confident: false, languages: [] }; } - const identificationTime = ChromeUtils.now() - identificationStartTime; - const identificationLog = - `Identified ${pageText.length} code units of text as "${result.language}" ` + - `in ${identificationTime.toFixed(3)} ms.`; + const message = + `Identified page language as "${result.language}" ` + + `in ${((ChromeUtils.now() - startTime) / 1000).toFixed(3)} seconds: ` + + this.browsingContext?.currentURI?.spec; - lazy.console.log(identificationLog); ChromeUtils.addProfilerMarker( "TranslationsParent", - { startTime: identificationStartTime, innerWindowId: this.innerWindowId }, - identificationLog - ); - ChromeUtils.addProfilerMarker( - "TranslationsParent", - { startTime: extractionStartTime, innerWindowId: this.innerWindowId }, - "Total time to identify page language." + { startTime, innerWindowId: this.innerWindowId }, + message ); + lazy.console.debug("\nExtracted Page Text:\n\n", pageText); + lazy.console.log(message); + if (pageText.length < TranslationsParent.#DOC_CONFIDENCE_THRESHOLD) { result.confident = false; } - const htmlLangAttribute = - this.languageState?.detectedLanguages?.htmlLangAttribute ?? null; - const identifiedLanguage = result.language; - - TranslationsParent.telemetry().onIdentifyPageLanguage({ - htmlLangAttribute, - identifiedLanguage, - langTagsMatch: htmlLangAttribute - ? lazy.TranslationsUtils.langTagsMatch( - htmlLangAttribute, - identifiedLanguage - ) - : null, - isLangAttributeValid: htmlLangAttribute - ? lazy.TranslationsUtils.isLangTagValid(htmlLangAttribute) - : null, - extractedCodeUnits: pageText.length, - extractionTime, - identificationTime, - totalTime: extractionTime + identificationTime, - confident: result.confident, - }); - return result; } diff --git a/toolkit/components/translations/metrics.yaml b/toolkit/components/translations/metrics.yaml @@ -55,49 +55,6 @@ translations: - translations-telemetry-alerts@mozilla.com expires: never - identify_page_language: - type: event - description: > - Triggers when the language of a page is identified for Translations. - extra_keys: - html_lang_attribute: - type: string - description: The language tag of the page specified within the HTML. - identified_language: - type: string - description: The language tag that was identified via detection. - lang_tags_match: - type: boolean - description: Whether the identified language matches the HTML lang attribute. - is_lang_attribute_valid: - type: boolean - description: Whether the language tag of the page is a valid language tag. - extracted_code_units: - type: quantity - description: The length of the page text sample that was used for detection. - extraction_time: - type: quantity - description: The time it took to extract a sample of page text, in ms. - identification_time: - type: quantity - description: The time it took to identify a language from the extracted text, in ms. - total_time: - type: quantity - description: The total time it took to extract page text and identify it, in ms. - confident: - type: boolean - description: Whether the language detection was confident in the result. - bugs: - - https://bugzilla.mozilla.org/show_bug.cgi?id=1861698 - data_reviews: - - https://bugzilla.mozilla.org/show_bug.cgi?id=1861698 - data_sensitivity: - - interaction - notification_emails: - - translations-telemetry-alerts@mozilla.com - expires: never - - translation_request: type: event description: >