tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

telemetry.sys.mjs (42859B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 // Support for Sync-and-FxA-related telemetry, which is submitted in a special-purpose
      6 // telemetry ping called the "sync ping", documented here:
      7 //
      8 //  ../../../toolkit/components/telemetry/docs/data/sync-ping.rst
      9 //
     10 // The sync ping contains identifiers that are linked to the user's Firefox Account
     11 // and are separate from the main telemetry client_id, so this file is also responsible
     12 // for ensuring that we can delete those pings upon user request, by plumbing its
     13 // identifiers into the "deletion-request" ping.
     14 
     15 import { Log } from "resource://gre/modules/Log.sys.mjs";
     16 
     17 const lazy = {};
     18 
     19 ChromeUtils.defineESModuleGetters(lazy, {
     20  Async: "resource://services-common/async.sys.mjs",
     21  AuthenticationError: "resource://services-sync/sync_auth.sys.mjs",
     22  FxAccounts: "resource://gre/modules/FxAccounts.sys.mjs",
     23  ObjectUtils: "resource://gre/modules/ObjectUtils.sys.mjs",
     24  Observers: "resource://services-common/observers.sys.mjs",
     25  Resource: "resource://services-sync/resource.sys.mjs",
     26  Status: "resource://services-sync/status.sys.mjs",
     27  Svc: "resource://services-sync/util.sys.mjs",
     28  TelemetryController: "resource://gre/modules/TelemetryController.sys.mjs",
     29  TelemetryEnvironment: "resource://gre/modules/TelemetryEnvironment.sys.mjs",
     30  TelemetryUtils: "resource://gre/modules/TelemetryUtils.sys.mjs",
     31  Weave: "resource://services-sync/main.sys.mjs",
     32 });
     33 
     34 ChromeUtils.defineLazyGetter(lazy, "fxAccounts", () => {
     35  return ChromeUtils.importESModule(
     36    "resource://gre/modules/FxAccounts.sys.mjs"
     37  ).getFxAccountsSingleton();
     38 });
     39 
     40 import * as constants from "resource://services-sync/constants.sys.mjs";
     41 
     42 ChromeUtils.defineLazyGetter(
     43  lazy,
     44  "WeaveService",
     45  () => Cc["@mozilla.org/weave/service;1"].getService().wrappedJSObject
     46 );
     47 const log = Log.repository.getLogger("Sync.Telemetry");
     48 
     49 const TOPICS = [
     50  // For tracking change to account/device identifiers.
     51  "fxaccounts:new_device_id",
     52  "fxaccounts:onlogout",
     53  "weave:service:ready",
     54  "weave:service:login:got-hashed-id",
     55 
     56  // For whole-of-sync metrics.
     57  "weave:service:sync:start",
     58  "weave:service:sync:finish",
     59  "weave:service:sync:error",
     60 
     61  // For individual engine metrics.
     62  "weave:engine:sync:start",
     63  "weave:engine:sync:finish",
     64  "weave:engine:sync:error",
     65  "weave:engine:sync:applied",
     66  "weave:engine:sync:step",
     67  "weave:engine:sync:uploaded",
     68  "weave:engine:validate:finish",
     69  "weave:engine:validate:error",
     70 
     71  // For ad-hoc telemetry events.
     72  "weave:telemetry:event",
     73  "weave:telemetry:histogram",
     74  "fxa:telemetry:event",
     75 
     76  "weave:telemetry:migration",
     77 ];
     78 
     79 const PING_FORMAT_VERSION = 1;
     80 
     81 const EMPTY_UID = "0".repeat(32);
     82 
     83 // The set of engines we record telemetry for - any other engines are ignored.
     84 const ENGINES = new Set([
     85  "addons",
     86  "bookmarks",
     87  "clients",
     88  "forms",
     89  "history",
     90  "passwords",
     91  "prefs",
     92  "tabs",
     93  "extension-storage",
     94  "addresses",
     95  "creditcards",
     96 ]);
     97 
     98 function tryGetMonotonicTimestamp() {
     99  try {
    100    return Services.telemetry.msSinceProcessStart();
    101  } catch (e) {
    102    log.warn("Unable to get a monotonic timestamp!");
    103    return -1;
    104  }
    105 }
    106 
    107 function timeDeltaFrom(monotonicStartTime) {
    108  let now = tryGetMonotonicTimestamp();
    109  if (monotonicStartTime !== -1 && now !== -1) {
    110    return Math.round(now - monotonicStartTime);
    111  }
    112  return -1;
    113 }
    114 
    115 const NS_ERROR_MODULE_BASE_OFFSET = 0x45;
    116 const NS_ERROR_MODULE_NETWORK = 6;
    117 
    118 // A reimplementation of NS_ERROR_GET_MODULE, which surprisingly doesn't seem
    119 // to exist anywhere in .js code in a way that can be reused.
    120 // This is taken from DownloadCore.sys.mjs.
    121 function NS_ERROR_GET_MODULE(code) {
    122  return ((code & 0x7fff0000) >> 16) - NS_ERROR_MODULE_BASE_OFFSET;
    123 }
    124 
    125 // Converts extra integer fields to strings, rounds floats to three
    126 // decimal places (nanosecond precision for timings), and removes profile
    127 // directory paths and URLs from potential error messages.
    128 function normalizeExtraTelemetryFields(extra) {
    129  let result = {};
    130  for (let key in extra) {
    131    let value = extra[key];
    132    let type = typeof value;
    133    if (type == "string") {
    134      result[key] = ErrorSanitizer.cleanErrorMessage(value);
    135    } else if (type == "number") {
    136      result[key] = Number.isInteger(value)
    137        ? value.toString(10)
    138        : value.toFixed(3);
    139    } else if (type != "undefined") {
    140      throw new TypeError(
    141        `Invalid type ${type} for extra telemetry field ${key}`
    142      );
    143    }
    144  }
    145  return lazy.ObjectUtils.isEmpty(result) ? undefined : result;
    146 }
    147 
    148 // Keps track of the counts of individual records fate during a sync cycle
    149 // The main reason this is a class is to keep track of reasons individual records
    150 // failure reasons without huge memory overhead.
    151 export class SyncedRecordsTelemetry {
    152  // applied    => number of items that should be applied.
    153  // failed     => number of items that failed in this sync.
    154  // newFailed  => number of items that failed for the first time in this sync.
    155  // reconciled => number of items that were reconciled.
    156  // failedReasons => {name, count} of reasons a record failed
    157  incomingCounts = {
    158    applied: 0,
    159    failed: 0,
    160    newFailed: 0,
    161    reconciled: 0,
    162    failedReasons: null,
    163  };
    164  outgoingCounts = { failed: 0, sent: 0, failedReasons: null };
    165 
    166  addIncomingFailedReason(reason) {
    167    if (!this.incomingCounts.failedReasons) {
    168      this.incomingCounts.failedReasons = [];
    169    }
    170    let transformedReason = SyncTelemetry.transformError(reason);
    171    // Some errors like http/nss errors don't have an error object
    172    // those will be caught by the higher level telemetry
    173    if (!transformedReason.error) {
    174      return;
    175    }
    176 
    177    let index = this.incomingCounts.failedReasons.findIndex(
    178      reasons => reasons.name === transformedReason.error
    179    );
    180 
    181    if (index >= 0) {
    182      this.incomingCounts.failedReasons[index].count += 1;
    183    } else {
    184      this.incomingCounts.failedReasons.push({
    185        name: transformedReason.error,
    186        count: 1,
    187      });
    188    }
    189  }
    190 
    191  addOutgoingFailedReason(reason) {
    192    if (!this.outgoingCounts.failedReasons) {
    193      this.outgoingCounts.failedReasons = [];
    194    }
    195    let transformedReason = SyncTelemetry.transformError(reason);
    196    // Some errors like http/nss errors don't have an error object
    197    // those will be caught by the higher level telemetry
    198    if (!transformedReason.error) {
    199      return;
    200    }
    201    let index = this.outgoingCounts.failedReasons.findIndex(
    202      reasons => reasons.name === transformedReason.error
    203    );
    204    if (index >= 0) {
    205      this.outgoingCounts.failedReasons[index].count += 1;
    206    } else {
    207      this.outgoingCounts.failedReasons.push({
    208        name: transformedReason.error,
    209        count: 1,
    210      });
    211    }
    212  }
    213 }
    214 
    215 // The `ErrorSanitizer` has 2 main jobs:
    216 // * Remove PII from errors, such as the profile dir or URLs the user might
    217 //   have visited.
    218 // * Normalize errors so different locales or operating systems etc don't
    219 //   generate different messages for the same underlying error.
    220 // * [TODO] Normalize errors so environmental factors don't influence message.
    221 //   For example, timestamps or GUIDs should be replaced with something static.
    222 export class ErrorSanitizer {
    223  // Things we normalize - this isn't exhaustive, but covers the common error messages we see.
    224  // Eg:
    225  // > Win error 112 during operation write on file [profileDir]\weave\addonsreconciler.json (Espacio en disco insuficiente. )
    226  // > Win error 112 during operation write on file [profileDir]\weave\addonsreconciler.json (Diskte yeterli yer yok. )
    227  // > <snip many other translations of the error>
    228  // > Unix error 28 during operation write on file [profileDir]/weave/addonsreconciler.json (No space left on device)
    229  // These tend to crowd out other errors we might care about (eg, top 5 errors for some engines are
    230  // variations of the "no space left on device")
    231 
    232  // Note that only errors that have same-but-different errors on Windows and Unix are here - we
    233  // still sanitize ones that aren't in these maps to remove the translations etc - eg,
    234  // `ERROR_SHARING_VIOLATION` doesn't really have a unix equivalent, so doesn't appear here, but
    235  // we still strip the translations to avoid the variants.
    236  static E_PERMISSION_DENIED = "OS error [Permission denied]";
    237  static E_NO_FILE_OR_DIR = "OS error [File/Path not found]";
    238 
    239  static DOMErrorSubstitutions = {
    240    NotFoundError: this.E_NO_FILE_OR_DIR,
    241    NotAllowedError: this.E_PERMISSION_DENIED,
    242  };
    243 
    244  // IOUtils error messages include the specific nsresult error code that caused them.
    245  static NS_ERROR_RE = new RegExp(/ \(NS_ERROR_.*\)$/);
    246 
    247  static #cleanOSErrorMessage(message, error = undefined) {
    248    if (DOMException.isInstance(error)) {
    249      const sub = this.DOMErrorSubstitutions[error.name];
    250      message = message.replaceAll("\\", "/");
    251      message = message.replace(this.NS_ERROR_RE, "");
    252      if (sub) {
    253        return `${sub} ${message}`;
    254      }
    255    }
    256 
    257    return message;
    258  }
    259 
    260  // A regex we can use to replace the profile dir in error messages. We use a
    261  // regexp so we can simply replace all case-insensitive occurences.
    262  // This escaping function is from:
    263  // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
    264  static reProfileDir = new RegExp(
    265    PathUtils.profileDir.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"),
    266    "gi"
    267  );
    268 
    269  /**
    270   * Clean an error message, removing PII and normalizing OS-specific messages.
    271   *
    272   * @param {string} message The error message
    273   * @param {Error?} error The error class instance, if any.
    274   */
    275  static cleanErrorMessage(message, error = undefined) {
    276    // There's a chance the profiledir is in the error string which is PII we
    277    // want to avoid including in the ping.
    278    message = message.replace(this.reProfileDir, "[profileDir]");
    279    // MSG_INVALID_URL from /dom/bindings/Errors.msg -- no way to access this
    280    // directly from JS.
    281    if (message.endsWith("is not a valid URL.")) {
    282      message = "<URL> is not a valid URL.";
    283    }
    284    // Try to filter things that look somewhat like a URL (in that they contain a
    285    // colon in the middle of non-whitespace), in case anything else is including
    286    // these in error messages. Note that JSON.stringified stuff comes through
    287    // here, so we explicitly ignore double-quotes as well.
    288    message = message.replace(/[^\s"]+:[^\s"]+/g, "<URL>");
    289 
    290    // Anywhere that's normalized the guid in errors we can easily filter
    291    // to make it easier to aggregate these types of errors
    292    message = message.replace(/<guid: ([^>]+)>/g, "<GUID>");
    293 
    294    return this.#cleanOSErrorMessage(message, error);
    295  }
    296 }
    297 
    298 // This function validates the payload of a telemetry "event" - this can be
    299 // removed once there are APIs available for the telemetry modules to collect
    300 // these events (bug 1329530) - but for now we simulate that planned API as
    301 // best we can.
    302 function validateTelemetryEvent(eventDetails) {
    303  let { object, method, value, extra } = eventDetails;
    304  // Do do basic validation of the params - everything except "extra" must
    305  // be a string. method and object are required.
    306  if (
    307    typeof method != "string" ||
    308    typeof object != "string" ||
    309    (value && typeof value != "string") ||
    310    (extra && typeof extra != "object")
    311  ) {
    312    log.warn("Invalid event parameters - wrong types", eventDetails);
    313    return false;
    314  }
    315  // length checks.
    316  if (
    317    method.length > 20 ||
    318    object.length > 20 ||
    319    (value && value.length > 80)
    320  ) {
    321    log.warn("Invalid event parameters - wrong lengths", eventDetails);
    322    return false;
    323  }
    324 
    325  // extra can be falsey, or an object with string names and values.
    326  if (extra) {
    327    if (Object.keys(extra).length > 10) {
    328      log.warn("Invalid event parameters - too many extra keys", eventDetails);
    329      return false;
    330    }
    331    for (let [ename, evalue] of Object.entries(extra)) {
    332      if (
    333        typeof ename != "string" ||
    334        ename.length > 15 ||
    335        typeof evalue != "string" ||
    336        evalue.length > 85
    337      ) {
    338        log.warn(
    339          `Invalid event parameters: extra item "${ename} is invalid`,
    340          eventDetails
    341        );
    342        return false;
    343      }
    344    }
    345  }
    346  return true;
    347 }
    348 
    349 class EngineRecord {
    350  constructor(name) {
    351    // startTime is in ms from process start, but is monotonic (unlike Date.now())
    352    // so we need to keep both it and when.
    353    this.startTime = tryGetMonotonicTimestamp();
    354    this.name = name;
    355 
    356    // This allows cases like bookmarks-buffered to have a separate name from
    357    // the bookmarks engine.
    358    let engineImpl = lazy.Weave.Service.engineManager.get(name);
    359    if (engineImpl && engineImpl.overrideTelemetryName) {
    360      this.overrideTelemetryName = engineImpl.overrideTelemetryName;
    361    }
    362  }
    363 
    364  toJSON() {
    365    let result = { name: this.overrideTelemetryName || this.name };
    366    let properties = [
    367      "took",
    368      "status",
    369      "failureReason",
    370      "incoming",
    371      "outgoing",
    372      "validation",
    373      "steps",
    374    ];
    375    for (let property of properties) {
    376      result[property] = this[property];
    377    }
    378    return result;
    379  }
    380 
    381  finished(error) {
    382    let took = timeDeltaFrom(this.startTime);
    383    if (took >= 0) {
    384      this.took = took;
    385    }
    386    if (error) {
    387      this.failureReason = SyncTelemetry.transformError(error);
    388    }
    389  }
    390 
    391  recordApplied(counts) {
    392    if (this.incoming) {
    393      log.error(
    394        `Incoming records applied multiple times for engine ${this.name}!`
    395      );
    396      return;
    397    }
    398    if (this.name === "clients" && !counts.failed) {
    399      // ignore successful application of client records
    400      // since otherwise they show up every time and are meaningless.
    401      return;
    402    }
    403 
    404    let incomingData = {};
    405 
    406    if (counts.failedReasons) {
    407      // sort the failed reasons in desc by count, then take top 10
    408      counts.failedReasons = counts.failedReasons
    409        .sort((a, b) => b.count - a.count)
    410        .slice(0, 10);
    411    }
    412    // Counts has extra stuff used for logging, but we only care about a few
    413    let properties = ["applied", "failed", "failedReasons"];
    414    // Only record non-zero properties and only record incoming at all if
    415    // there's at least one property we care about.
    416    for (let property of properties) {
    417      if (counts[property]) {
    418        incomingData[property] = counts[property];
    419        this.incoming = incomingData;
    420      }
    421    }
    422  }
    423 
    424  recordStep(stepResult) {
    425    let step = {
    426      name: stepResult.name,
    427    };
    428    if (stepResult.took >= 0) {
    429      step.took = Math.round(stepResult.took);
    430    }
    431    if (stepResult.counts) {
    432      let counts = stepResult.counts.filter(({ count }) => count > 0);
    433      if (counts.length) {
    434        step.counts = counts;
    435      }
    436    }
    437    if (this.steps) {
    438      this.steps.push(step);
    439    } else {
    440      this.steps = [step];
    441    }
    442  }
    443 
    444  recordValidation(validationResult) {
    445    if (this.validation) {
    446      log.error(`Multiple validations occurred for engine ${this.name}!`);
    447      return;
    448    }
    449    let { problems, version, took, checked } = validationResult;
    450    let validation = {
    451      version: version || 0,
    452      checked: checked || 0,
    453    };
    454    if (took >= 0) {
    455      validation.took = Math.round(took);
    456    }
    457    let summarized = problems.filter(({ count }) => count > 0);
    458    if (summarized.length) {
    459      validation.problems = summarized;
    460    }
    461    this.validation = validation;
    462  }
    463 
    464  recordValidationError(e) {
    465    if (this.validation) {
    466      log.error(`Multiple validations occurred for engine ${this.name}!`);
    467      return;
    468    }
    469 
    470    this.validation = {
    471      failureReason: SyncTelemetry.transformError(e),
    472    };
    473  }
    474 
    475  recordUploaded(counts) {
    476    if (counts.sent || counts.failed) {
    477      if (!this.outgoing) {
    478        this.outgoing = [];
    479      }
    480      if (counts.failedReasons) {
    481        // sort the failed reasons in desc by count, then take top 10
    482        counts.failedReasons = counts.failedReasons
    483          .sort((a, b) => b.count - a.count)
    484          .slice(0, 10);
    485      }
    486      this.outgoing.push({
    487        sent: counts.sent || undefined,
    488        failed: counts.failed || undefined,
    489        failedReasons: counts.failedReasons || undefined,
    490      });
    491    }
    492  }
    493 }
    494 
    495 // The record of a single "sync" - typically many of these are submitted in
    496 // a single ping (ie, as a 'syncs' array)
    497 export class SyncRecord {
    498  constructor(allowedEngines, why) {
    499    this.allowedEngines = allowedEngines;
    500    // Our failure reason. This property only exists in the generated ping if an
    501    // error actually occurred.
    502    this.failureReason = undefined;
    503    this.syncNodeType = null;
    504    this.when = Date.now();
    505    this.startTime = tryGetMonotonicTimestamp();
    506    this.took = 0; // will be set later.
    507    this.why = why;
    508 
    509    // All engines that have finished (ie, does not include the "current" one)
    510    // We omit this from the ping if it's empty.
    511    this.engines = [];
    512    // The engine that has started but not yet stopped.
    513    this.currentEngine = null;
    514  }
    515 
    516  toJSON() {
    517    let result = {
    518      when: this.when,
    519      took: this.took,
    520      failureReason: this.failureReason,
    521      status: this.status,
    522    };
    523    if (this.why) {
    524      result.why = this.why;
    525    }
    526    let engines = [];
    527    for (let engine of this.engines) {
    528      engines.push(engine.toJSON());
    529    }
    530    if (engines.length) {
    531      result.engines = engines;
    532    }
    533    return result;
    534  }
    535 
    536  finished(error) {
    537    this.took = timeDeltaFrom(this.startTime);
    538    if (this.currentEngine != null) {
    539      log.error(
    540        "Finished called for the sync before the current engine finished"
    541      );
    542      this.currentEngine.finished(null);
    543      this.onEngineStop(this.currentEngine.name);
    544    }
    545    if (error) {
    546      this.failureReason = SyncTelemetry.transformError(error);
    547    }
    548 
    549    this.syncNodeType = lazy.Weave.Service.identity.telemetryNodeType;
    550 
    551    // Check for engine statuses. -- We do this now, and not in engine.finished
    552    // to make sure any statuses that get set "late" are recorded
    553    for (let engine of this.engines) {
    554      let status = lazy.Status.engines[engine.name];
    555      if (status && status !== constants.ENGINE_SUCCEEDED) {
    556        engine.status = status;
    557      }
    558    }
    559 
    560    let statusObject = {};
    561 
    562    let serviceStatus = lazy.Status.service;
    563    if (serviceStatus && serviceStatus !== constants.STATUS_OK) {
    564      statusObject.service = serviceStatus;
    565      this.status = statusObject;
    566    }
    567    let syncStatus = lazy.Status.sync;
    568    if (syncStatus && syncStatus !== constants.SYNC_SUCCEEDED) {
    569      statusObject.sync = syncStatus;
    570      this.status = statusObject;
    571    }
    572  }
    573 
    574  onEngineStart(engineName) {
    575    if (this._shouldIgnoreEngine(engineName, false)) {
    576      return;
    577    }
    578 
    579    if (this.currentEngine) {
    580      log.error(
    581        `Being told that engine ${engineName} has started, but current engine ${this.currentEngine.name} hasn't stopped`
    582      );
    583      // Just discard the current engine rather than making up data for it.
    584    }
    585    this.currentEngine = new EngineRecord(engineName);
    586  }
    587 
    588  onEngineStop(engineName, error) {
    589    // We only care if it's the current engine if we have a current engine.
    590    if (this._shouldIgnoreEngine(engineName, !!this.currentEngine)) {
    591      return;
    592    }
    593    if (!this.currentEngine) {
    594      // It's possible for us to get an error before the start message of an engine
    595      // (somehow), in which case we still want to record that error.
    596      if (!error) {
    597        return;
    598      }
    599      log.error(
    600        `Error triggered on ${engineName} when no current engine exists: ${error}`
    601      );
    602      this.currentEngine = new EngineRecord(engineName);
    603    }
    604    this.currentEngine.finished(error);
    605    this.engines.push(this.currentEngine);
    606    this.currentEngine = null;
    607  }
    608 
    609  onEngineApplied(engineName, counts) {
    610    if (this._shouldIgnoreEngine(engineName)) {
    611      return;
    612    }
    613    this.currentEngine.recordApplied(counts);
    614  }
    615 
    616  onEngineStep(engineName, step) {
    617    if (this._shouldIgnoreEngine(engineName)) {
    618      return;
    619    }
    620    this.currentEngine.recordStep(step);
    621  }
    622 
    623  onEngineValidated(engineName, validationData) {
    624    if (this._shouldIgnoreEngine(engineName, false)) {
    625      return;
    626    }
    627    let engine = this.engines.find(e => e.name === engineName);
    628    if (
    629      !engine &&
    630      this.currentEngine &&
    631      engineName === this.currentEngine.name
    632    ) {
    633      engine = this.currentEngine;
    634    }
    635    if (engine) {
    636      engine.recordValidation(validationData);
    637    } else {
    638      log.warn(
    639        `Validation event triggered for engine ${engineName}, which hasn't been synced!`
    640      );
    641    }
    642  }
    643 
    644  onEngineValidateError(engineName, error) {
    645    if (this._shouldIgnoreEngine(engineName, false)) {
    646      return;
    647    }
    648    let engine = this.engines.find(e => e.name === engineName);
    649    if (
    650      !engine &&
    651      this.currentEngine &&
    652      engineName === this.currentEngine.name
    653    ) {
    654      engine = this.currentEngine;
    655    }
    656    if (engine) {
    657      engine.recordValidationError(error);
    658    } else {
    659      log.warn(
    660        `Validation failure event triggered for engine ${engineName}, which hasn't been synced!`
    661      );
    662    }
    663  }
    664 
    665  onEngineUploaded(engineName, counts) {
    666    if (this._shouldIgnoreEngine(engineName)) {
    667      return;
    668    }
    669    this.currentEngine.recordUploaded(counts);
    670  }
    671 
    672  _shouldIgnoreEngine(engineName, shouldBeCurrent = true) {
    673    if (!this.allowedEngines.has(engineName)) {
    674      log.info(
    675        `Notification for engine ${engineName}, but we aren't recording telemetry for it`
    676      );
    677      return true;
    678    }
    679    if (shouldBeCurrent) {
    680      if (!this.currentEngine || engineName != this.currentEngine.name) {
    681        log.info(`Notification for engine ${engineName} but it isn't current`);
    682        return true;
    683      }
    684    }
    685    return false;
    686  }
    687 }
    688 
    689 // The entire "sync ping" - it includes all the syncs, events etc recorded in
    690 // the ping.
    691 class SyncTelemetryImpl {
    692  constructor(allowedEngines) {
    693    log.manageLevelFromPref("services.sync.log.logger.telemetry");
    694    // This is accessible so we can enable custom engines during tests.
    695    this.allowedEngines = allowedEngines;
    696    this.current = null;
    697    this.setupObservers();
    698 
    699    this.payloads = [];
    700    this.discarded = 0;
    701    this.events = [];
    702    this.histograms = {};
    703    this.migrations = [];
    704    this.maxEventsCount = lazy.Svc.PrefBranch.getIntPref(
    705      "telemetry.maxEventsCount",
    706      1000
    707    );
    708    this.maxPayloadCount = lazy.Svc.PrefBranch.getIntPref(
    709      "telemetry.maxPayloadCount"
    710    );
    711    this.submissionInterval =
    712      lazy.Svc.PrefBranch.getIntPref("telemetry.submissionInterval") * 1000;
    713    this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
    714    this.lastUID = EMPTY_UID;
    715    this.lastSyncNodeType = null;
    716    this.currentSyncNodeType = null;
    717    // Note that the sessionStartDate is somewhat arbitrary - the telemetry
    718    // modules themselves just use `new Date()`. This means that our startDate
    719    // isn't going to be the same as the sessionStartDate in the main pings,
    720    // but that's OK for now - if it's a problem we'd need to change the
    721    // telemetry modules to expose what it thinks the sessionStartDate is.
    722    let sessionStartDate = new Date();
    723    this.sessionStartDate = lazy.TelemetryUtils.toLocalTimeISOString(
    724      lazy.TelemetryUtils.truncateToHours(sessionStartDate)
    725    );
    726    lazy.TelemetryController.registerSyncPingShutdown(() => this.shutdown());
    727  }
    728 
    729  sanitizeFxaDeviceId(deviceId) {
    730    return lazy.fxAccounts.telemetry.sanitizeDeviceId(deviceId);
    731  }
    732 
    733  prepareFxaDevices(devices) {
    734    // For non-sync users, the data per device is limited -- just an id and a
    735    // type (and not even the id yet). For sync users, if we can correctly map
    736    // the fxaDevice to a sync device, then we can get os and version info,
    737    // which would be quite unfortunate to lose.
    738    let extraInfoMap = new Map();
    739    if (this.syncIsEnabled()) {
    740      for (let client of this.getClientsEngineRecords()) {
    741        if (client.fxaDeviceId) {
    742          extraInfoMap.set(client.fxaDeviceId, {
    743            os: client.os,
    744            version: client.version,
    745            syncID: this.sanitizeFxaDeviceId(client.id),
    746          });
    747        }
    748      }
    749    }
    750    // Finally, sanitize and convert to the proper format.
    751    return devices.map(d => {
    752      let { os, version, syncID } = extraInfoMap.get(d.id) || {
    753        os: undefined,
    754        version: undefined,
    755        syncID: undefined,
    756      };
    757      return {
    758        id: this.sanitizeFxaDeviceId(d.id) || EMPTY_UID,
    759        type: d.type,
    760        os,
    761        version,
    762        syncID,
    763      };
    764    });
    765  }
    766 
    767  syncIsEnabled() {
    768    return lazy.WeaveService.enabled && lazy.WeaveService.ready;
    769  }
    770 
    771  // Separate for testing.
    772  getClientsEngineRecords() {
    773    if (!this.syncIsEnabled()) {
    774      throw new Error("Bug: syncIsEnabled() must be true, check it first");
    775    }
    776    return lazy.Weave.Service.clientsEngine.remoteClients;
    777  }
    778 
    779  updateFxaDevices(devices) {
    780    if (!devices) {
    781      return {};
    782    }
    783    let me = devices.find(d => d.isCurrentDevice);
    784    let id = me ? this.sanitizeFxaDeviceId(me.id) : undefined;
    785    let cleanDevices = this.prepareFxaDevices(devices);
    786    return { deviceID: id, devices: cleanDevices };
    787  }
    788 
    789  getFxaDevices() {
    790    return lazy.fxAccounts.device.recentDeviceList;
    791  }
    792 
    793  getPingJSON(reason) {
    794    let { devices, deviceID } = this.updateFxaDevices(this.getFxaDevices());
    795    return {
    796      os: lazy.TelemetryEnvironment.currentEnvironment.system.os,
    797      why: reason,
    798      devices,
    799      discarded: this.discarded || undefined,
    800      version: PING_FORMAT_VERSION,
    801      syncs: this.payloads.slice(),
    802      uid: this.lastUID,
    803      syncNodeType: this.lastSyncNodeType || undefined,
    804      deviceID,
    805      sessionStartDate: this.sessionStartDate,
    806      events: !this.events.length ? undefined : this.events,
    807      migrations: !this.migrations.length ? undefined : this.migrations,
    808      histograms: !Object.keys(this.histograms).length
    809        ? undefined
    810        : this.histograms,
    811    };
    812  }
    813 
    814  _addMigrationRecord(type, info) {
    815    log.debug("Saw telemetry migration info", type, info);
    816    // Updates to this need to be documented in `sync-ping.rst`
    817    // and considered in the metric definition of `syncs.migrations`.
    818    switch (type) {
    819      case "webext-storage":
    820        this.migrations.push({
    821          type: "webext-storage",
    822          entries: +info.entries,
    823          entriesSuccessful: +info.entries_successful,
    824          extensions: +info.extensions,
    825          extensionsSuccessful: +info.extensions_successful,
    826          openFailure: !!info.open_failure,
    827        });
    828        break;
    829      default:
    830        throw new Error("Bug: Unknown migration record type " + type);
    831    }
    832  }
    833 
    834  finish(reason) {
    835    // Note that we might be in the middle of a sync right now, and so we don't
    836    // want to touch this.current.
    837    let result = this.getPingJSON(reason);
    838    this.payloads = [];
    839    this.discarded = 0;
    840    this.events = [];
    841    this.migrations = [];
    842    this.histograms = {};
    843    this.submit(result);
    844  }
    845 
    846  setupObservers() {
    847    for (let topic of TOPICS) {
    848      lazy.Observers.add(topic, this, this);
    849    }
    850  }
    851 
    852  shutdown() {
    853    this.finish("shutdown");
    854    for (let topic of TOPICS) {
    855      lazy.Observers.remove(topic, this, this);
    856    }
    857  }
    858 
    859  submit(record) {
    860    if (!this.isProductionSyncUser()) {
    861      return false;
    862    }
    863    // We still call submit() with possibly illegal payloads so that tests can
    864    // know that the ping was built. We don't end up submitting them, however.
    865    let numEvents = record.events ? record.events.length : 0;
    866    let numMigrations = record.migrations ? record.migrations.length : 0;
    867    if (record.syncs.length || numEvents || numMigrations) {
    868      log.trace(
    869        `submitting ${record.syncs.length} sync record(s) and ` +
    870          `${numEvents} event(s) to telemetry`
    871      );
    872      lazy.TelemetryController.submitExternalPing("sync", record, {
    873        usePingSender: true,
    874      }).catch(err => {
    875        log.error("failed to submit ping", err);
    876      });
    877 
    878      Glean.syncs.discarded.set(record.discarded);
    879      Glean.syncs.hashedFxaUid.set(record.uid);
    880      Glean.syncs.hashedDeviceId.set(record.deviceID);
    881      Glean.syncs.sessionStartDate.set(record.sessionStartDate * 1000);
    882      Glean.syncs.syncNodeType.set(record.syncNodeType);
    883      // Glean object metrics forbid the use of e.g. 'type' (bug 1945220),
    884      // so we can't use record.syncs directly.
    885      let syncs = structuredClone(record.syncs);
    886      for (let sync of syncs) {
    887        if (!("devices" in sync)) {
    888          continue;
    889        }
    890        for (let device of sync.devices) {
    891          device.device_type = device.type;
    892          delete device.type;
    893        }
    894      }
    895      Glean.syncs.syncs.set(syncs);
    896      if (numMigrations) {
    897        // Glean object metrics forbid the use of e.g. 'type' (bug 1945220),
    898        // so we can't use record.migrations directly.
    899        let migrations = structuredClone(record.migrations);
    900        for (let migration of migrations) {
    901          migration.migration_type = migration.type;
    902          delete migration.type;
    903        }
    904        Glean.syncs.migrations.set(migrations);
    905      }
    906      GleanPings.sync.submit(record.why);
    907      return true;
    908    }
    909    return false;
    910  }
    911 
    912  isProductionSyncUser() {
    913    // If FxA isn't production then we treat sync as not being production.
    914    // Further, there's the deprecated "services.sync.tokenServerURI" pref we
    915    // need to consider - fxa doesn't consider that as if that's the only
    916    // pref set, they *are* running a production fxa, just not production sync.
    917    if (
    918      !lazy.FxAccounts.config.isProductionConfig() ||
    919      Services.prefs.prefHasUserValue("services.sync.tokenServerURI")
    920    ) {
    921      log.trace(`Not sending telemetry ping for self-hosted Sync user`);
    922      return false;
    923    }
    924    return true;
    925  }
    926 
    927  onSyncStarted(data) {
    928    const why = data && JSON.parse(data).why;
    929    if (this.current) {
    930      log.warn(
    931        "Observed weave:service:sync:start, but we're already recording a sync!"
    932      );
    933      // Just discard the old record, consistent with our handling of engines, above.
    934      this.current = null;
    935    }
    936    this.current = new SyncRecord(this.allowedEngines, why);
    937  }
    938 
    939  // We need to ensure that the telemetry `deletion-request` ping always contains the user's
    940  // current sync device ID, because if the user opts out of telemetry then the deletion ping
    941  // will be immediately triggered for sending, and we won't have a chance to fill it in later.
    942  // This keeps the `deletion-ping` up-to-date when the user's account state changes.
    943  onAccountInitOrChange() {
    944    // We don't submit sync pings for self-hosters, so don't need to collect their device ids either.
    945    if (!this.isProductionSyncUser()) {
    946      return;
    947    }
    948    // Awkwardly async, but no need to await. If the user's account state changes while
    949    // this promise is in flight, it will reject and we won't record any data in the ping.
    950    // (And a new notification will trigger us to try again with the new state).
    951    lazy.fxAccounts.device
    952      .getLocalId()
    953      .then(deviceId => {
    954        let sanitizedDeviceId =
    955          lazy.fxAccounts.telemetry.sanitizeDeviceId(deviceId);
    956        // In the past we did not persist the FxA metrics identifiers to disk,
    957        // so this might be missing until we can fetch it from the server for the
    958        // first time. There will be a fresh notification tirggered when it's available.
    959        if (sanitizedDeviceId) {
    960          // Sanitized device ids are 64 characters long, but telemetry limits scalar strings to 50.
    961          // The first 32 chars are sufficient to uniquely identify the device, so just send those.
    962          // It's hard to change the sync ping itself to only send 32 chars, to b/w compat reasons.
    963          sanitizedDeviceId = sanitizedDeviceId.substr(0, 32);
    964          Glean.deletionRequest.syncDeviceId.set(sanitizedDeviceId);
    965        }
    966      })
    967      .catch(err => {
    968        log.warn(
    969          `Failed to set sync identifiers in the deletion-request ping: ${err}`
    970        );
    971      });
    972  }
    973 
    974  // This keeps the `deletion-request` ping up-to-date when the user signs out,
    975  // clearing the now-nonexistent sync device id.
    976  onAccountLogout() {
    977    Glean.deletionRequest.syncDeviceId.set("");
    978  }
    979 
    980  _checkCurrent(topic) {
    981    if (!this.current) {
    982      // This is only `info` because it happens when we do a tabs "quick-write"
    983      log.info(
    984        `Observed notification ${topic} but no current sync is being recorded.`
    985      );
    986      return false;
    987    }
    988    return true;
    989  }
    990 
    991  _shouldSubmitForDataChange() {
    992    let newID = lazy.fxAccounts.telemetry.getSanitizedUID() || EMPTY_UID;
    993    let oldID = this.lastUID;
    994    if (
    995      newID != EMPTY_UID &&
    996      oldID != EMPTY_UID &&
    997      // Both are "real" uids, so we care if they've changed.
    998      newID != oldID
    999    ) {
   1000      log.trace(
   1001        `shouldSubmitForDataChange - uid from '${oldID}' -> '${newID}'`
   1002      );
   1003      return true;
   1004    }
   1005    // We've gone from knowing one of the ids to not knowing it (which we
   1006    // ignore) or we've gone from not knowing it to knowing it (which is fine),
   1007    // Now check the node type because a change there also means we should
   1008    // submit.
   1009    if (
   1010      this.lastSyncNodeType &&
   1011      this.currentSyncNodeType != this.lastSyncNodeType
   1012    ) {
   1013      log.trace(
   1014        `shouldSubmitForDataChange - nodeType from '${this.lastSyncNodeType}' -> '${this.currentSyncNodeType}'`
   1015      );
   1016      return true;
   1017    }
   1018    log.trace("shouldSubmitForDataChange - no need to submit");
   1019    return false;
   1020  }
   1021 
   1022  maybeSubmitForDataChange() {
   1023    if (this._shouldSubmitForDataChange()) {
   1024      log.info(
   1025        "Early submission of sync telemetry due to changed IDs/NodeType"
   1026      );
   1027      this.finish("idchange"); // this actually submits.
   1028      this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
   1029    }
   1030 
   1031    // Only update the last UIDs if we actually know them.
   1032    let current_uid = lazy.fxAccounts.telemetry.getSanitizedUID();
   1033    if (current_uid) {
   1034      this.lastUID = current_uid;
   1035    }
   1036    if (this.currentSyncNodeType) {
   1037      this.lastSyncNodeType = this.currentSyncNodeType;
   1038    }
   1039  }
   1040 
   1041  maybeSubmitForInterval() {
   1042    // We want to submit the ping every `this.submissionInterval` but only when
   1043    // there's no current sync in progress, otherwise we may end up submitting
   1044    // the sync and the events caused by it in different pings.
   1045    if (
   1046      this.current == null &&
   1047      Services.telemetry.msSinceProcessStart() - this.lastSubmissionTime >
   1048        this.submissionInterval
   1049    ) {
   1050      this.finish("schedule");
   1051      this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
   1052    }
   1053  }
   1054 
   1055  onSyncFinished(error) {
   1056    if (!this.current) {
   1057      log.warn("onSyncFinished but we aren't recording");
   1058      return;
   1059    }
   1060    this.current.finished(error);
   1061    this.currentSyncNodeType = this.current.syncNodeType;
   1062    let current = this.current;
   1063    this.current = null;
   1064    this.takeTelemetryRecord(current);
   1065  }
   1066 
   1067  takeTelemetryRecord(record) {
   1068    // We check for "data change" before appending the current sync to payloads,
   1069    // as it is the current sync which has the data with the new data, and thus
   1070    // must go in the *next* submission.
   1071    this.maybeSubmitForDataChange();
   1072    if (this.payloads.length < this.maxPayloadCount) {
   1073      this.payloads.push(record.toJSON());
   1074    } else {
   1075      ++this.discarded;
   1076    }
   1077    // If we are submitting due to timing, it's desirable that the most recent
   1078    // sync is included, so we check after appending the record.
   1079    this.maybeSubmitForInterval();
   1080  }
   1081 
   1082  _addHistogram(hist) {
   1083    let histogram = Services.telemetry.getHistogramById(hist);
   1084    let s = histogram.snapshot();
   1085    this.histograms[hist] = s;
   1086  }
   1087 
   1088  _recordEvent(eventDetails) {
   1089    this.maybeSubmitForDataChange();
   1090 
   1091    if (this.events.length >= this.maxEventsCount) {
   1092      log.warn("discarding event - already queued our maximum", eventDetails);
   1093      return;
   1094    }
   1095 
   1096    let { object, method, value, extra } = eventDetails;
   1097    if (extra) {
   1098      extra = normalizeExtraTelemetryFields(extra);
   1099      eventDetails = { object, method, value, extra };
   1100    }
   1101 
   1102    if (!validateTelemetryEvent(eventDetails)) {
   1103      // we've already logged what the problem is...
   1104      return;
   1105    }
   1106    log.debug("recording event", eventDetails);
   1107 
   1108    if (extra && lazy.Resource.serverTime && !extra.serverTime) {
   1109      extra.serverTime = String(lazy.Resource.serverTime);
   1110    }
   1111    let category = "sync";
   1112    let ts = Math.floor(tryGetMonotonicTimestamp());
   1113 
   1114    // An event record is a simple array with at least 4 items.
   1115    let event = [ts, category, method, object];
   1116    // It may have up to 6 elements if |extra| is defined
   1117    if (value) {
   1118      event.push(value);
   1119      if (extra) {
   1120        event.push(extra);
   1121      }
   1122    } else if (extra) {
   1123      event.push(null); // a null for the empty value.
   1124      event.push(extra);
   1125    }
   1126    this.events.push(event);
   1127    this.maybeSubmitForInterval();
   1128  }
   1129 
   1130  observe(subject, topic, data) {
   1131    log.trace(`observed ${topic} ${data}`);
   1132 
   1133    switch (topic) {
   1134      case "weave:service:ready":
   1135      case "weave:service:login:got-hashed-id":
   1136      case "fxaccounts:new_device_id":
   1137        this.onAccountInitOrChange();
   1138        break;
   1139 
   1140      case "fxaccounts:onlogout":
   1141        this.onAccountLogout();
   1142        break;
   1143 
   1144      /* sync itself state changes */
   1145      case "weave:service:sync:start":
   1146        this.onSyncStarted(data);
   1147        break;
   1148 
   1149      case "weave:service:sync:finish":
   1150        if (this._checkCurrent(topic)) {
   1151          this.onSyncFinished(null);
   1152        }
   1153        break;
   1154 
   1155      case "weave:service:sync:error":
   1156        // argument needs to be truthy (this should always be the case)
   1157        this.onSyncFinished(subject || "Unknown");
   1158        break;
   1159 
   1160      /* engine sync state changes */
   1161      case "weave:engine:sync:start":
   1162        if (this._checkCurrent(topic)) {
   1163          this.current.onEngineStart(data);
   1164        }
   1165        break;
   1166      case "weave:engine:sync:finish":
   1167        if (this._checkCurrent(topic)) {
   1168          this.current.onEngineStop(data, null);
   1169        }
   1170        break;
   1171 
   1172      case "weave:engine:sync:error":
   1173        if (this._checkCurrent(topic)) {
   1174          // argument needs to be truthy (this should always be the case)
   1175          this.current.onEngineStop(data, subject || "Unknown");
   1176        }
   1177        break;
   1178 
   1179      /* engine counts */
   1180      case "weave:engine:sync:applied":
   1181        if (this._checkCurrent(topic)) {
   1182          this.current.onEngineApplied(data, subject);
   1183        }
   1184        break;
   1185 
   1186      case "weave:engine:sync:step":
   1187        if (this._checkCurrent(topic)) {
   1188          this.current.onEngineStep(data, subject);
   1189        }
   1190        break;
   1191 
   1192      case "weave:engine:sync:uploaded":
   1193        if (this._checkCurrent(topic)) {
   1194          this.current.onEngineUploaded(data, subject);
   1195        }
   1196        break;
   1197 
   1198      case "weave:engine:validate:finish":
   1199        if (this._checkCurrent(topic)) {
   1200          this.current.onEngineValidated(data, subject);
   1201        }
   1202        break;
   1203 
   1204      case "weave:engine:validate:error":
   1205        if (this._checkCurrent(topic)) {
   1206          this.current.onEngineValidateError(data, subject || "Unknown");
   1207        }
   1208        break;
   1209 
   1210      case "weave:telemetry:event":
   1211      case "fxa:telemetry:event":
   1212        this._recordEvent(subject);
   1213        break;
   1214 
   1215      case "weave:telemetry:histogram":
   1216        this._addHistogram(data);
   1217        break;
   1218 
   1219      case "weave:telemetry:migration":
   1220        this._addMigrationRecord(data, subject);
   1221        break;
   1222 
   1223      default:
   1224        log.warn(`unexpected observer topic ${topic}`);
   1225        break;
   1226    }
   1227  }
   1228 
   1229  // Transform an exception into a standard description. Exposed here for when
   1230  // this module isn't directly responsible for knowing the transform should
   1231  // happen (for example, when including an error in the |extra| field of
   1232  // event telemetry)
   1233  transformError(error) {
   1234    // Certain parts of sync will use this pattern as a way to communicate to
   1235    // processIncoming to abort the processing. However, there's no guarantee
   1236    // this can only happen then.
   1237    if (typeof error == "object" && error.code && error.cause) {
   1238      error = error.cause;
   1239    }
   1240    if (lazy.Async.isShutdownException(error)) {
   1241      return { name: "shutdownerror" };
   1242    }
   1243 
   1244    if (typeof error === "string") {
   1245      if (error.startsWith("error.")) {
   1246        // This is hacky, but I can't imagine that it's not also accurate.
   1247        return { name: "othererror", error };
   1248      }
   1249      error = ErrorSanitizer.cleanErrorMessage(error);
   1250      return { name: "unexpectederror", error };
   1251    }
   1252 
   1253    if (error instanceof lazy.AuthenticationError) {
   1254      return { name: "autherror", from: error.source };
   1255    }
   1256 
   1257    if (DOMException.isInstance(error)) {
   1258      return {
   1259        name: "unexpectederror",
   1260        error: ErrorSanitizer.cleanErrorMessage(error.message, error),
   1261      };
   1262    }
   1263 
   1264    let httpCode =
   1265      error.status || (error.response && error.response.status) || error.code;
   1266 
   1267    if (httpCode) {
   1268      return { name: "httperror", code: httpCode };
   1269    }
   1270 
   1271    if (error.failureCode) {
   1272      return { name: "othererror", error: error.failureCode };
   1273    }
   1274 
   1275    if (error.result) {
   1276      // many "nsresult" errors are actually network errors - if they are
   1277      // associated with the "network" module we assume that's true.
   1278      // We also assume NS_ERROR_ABORT is such an error - for almost everything
   1279      // we care about, it acually is (eg, if the connection fails early enough
   1280      // or if we have a captive portal etc) - we don't lose anything by this
   1281      // assumption, it's just that the error will no longer be in the "nserror"
   1282      // category, so our analysis can still find them.
   1283      if (
   1284        error.result == Cr.NS_ERROR_ABORT ||
   1285        NS_ERROR_GET_MODULE(error.result) == NS_ERROR_MODULE_NETWORK
   1286      ) {
   1287        return { name: "httperror", code: error.result };
   1288      }
   1289      return { name: "nserror", code: error.result };
   1290    }
   1291    // It's probably an Error object, but it also could be some
   1292    // other object that may or may not override toString to do
   1293    // something useful.
   1294    let msg = String(error);
   1295    if (msg.startsWith("[object")) {
   1296      // Nothing useful in the default, check for a string "message" property.
   1297      if (typeof error.message == "string") {
   1298        msg = String(error.message);
   1299      } else {
   1300        // Hopefully it won't come to this...
   1301        msg = JSON.stringify(error);
   1302      }
   1303    }
   1304    return {
   1305      name: "unexpectederror",
   1306      error: ErrorSanitizer.cleanErrorMessage(msg),
   1307    };
   1308  }
   1309 }
   1310 
   1311 export var SyncTelemetry = new SyncTelemetryImpl(ENGINES);