tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fetch-xpcshell-data.js (55373B)


      1 #!/usr/bin/env node
      2 
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 const fs = require("fs");
      8 const path = require("path");
      9 const { Worker } = require("worker_threads");
     10 const os = require("os");
     11 
     12 const MAX_WORKERS = Math.min(32, os.cpus().length);
     13 
     14 const TASKCLUSTER_BASE_URL =
     15  process.env.TASKCLUSTER_PROXY_URL ||
     16  process.env.TASKCLUSTER_ROOT_URL ||
     17  "https://firefox-ci-tc.services.mozilla.com";
     18 
     19 // Check for --output-dir parameter
     20 const OUTPUT_DIR = (() => {
     21  const outputDirIndex = process.argv.findIndex(arg => arg === "--output-dir");
     22  if (outputDirIndex !== -1 && outputDirIndex + 1 < process.argv.length) {
     23    return process.argv[outputDirIndex + 1];
     24  }
     25  return "./xpcshell-data";
     26 })();
     27 
     28 const PROFILE_CACHE_DIR = "./profile-cache";
     29 
     30 let previousRunData = null;
     31 let allJobsCache = null;
     32 let componentsData = null;
     33 
     34 if (!fs.existsSync(OUTPUT_DIR)) {
     35  fs.mkdirSync(OUTPUT_DIR, { recursive: true });
     36 }
     37 if (!fs.existsSync(PROFILE_CACHE_DIR)) {
     38  fs.mkdirSync(PROFILE_CACHE_DIR, { recursive: true });
     39 }
     40 
     41 // Get date in YYYY-MM-DD format
     42 function getDateString(daysAgo = 0) {
     43  const date = new Date();
     44  date.setDate(date.getDate() - daysAgo);
     45  return date.toISOString().split("T")[0];
     46 }
     47 
     48 async function fetchJson(url) {
     49  const response = await fetch(url);
     50  if (!response.ok) {
     51    return null;
     52  }
     53  return response.json();
     54 }
     55 
     56 // Fetch commit push data from Treeherder API
     57 async function fetchCommitData(project, revision) {
     58  console.log(`Fetching commit data for ${project}:${revision}...`);
     59 
     60  const result = await fetchJson(
     61    `https://treeherder.mozilla.org/api/project/${project}/push/?full=true&count=10&revision=${revision}`
     62  );
     63 
     64  if (!result || !result.results || result.results.length === 0) {
     65    throw new Error(
     66      `No push found for revision ${revision} on project ${project}`
     67    );
     68  }
     69 
     70  const pushId = result.results[0].id;
     71  console.log(`Found push ID: ${pushId}`);
     72  return pushId;
     73 }
     74 
     75 // Fetch jobs from push
     76 async function fetchPushJobs(project, pushId) {
     77  console.log(`Fetching jobs for push ID ${pushId}...`);
     78 
     79  let allJobs = [];
     80  let propertyNames = [];
     81  let url = `https://treeherder.mozilla.org/api/jobs/?push_id=${pushId}`;
     82 
     83  // The /jobs/ API is paginated, keep fetching until next is null
     84  while (url) {
     85    const result = await fetchJson(url);
     86    if (!result) {
     87      throw new Error(`Failed to fetch jobs for push ID ${pushId}`);
     88    }
     89 
     90    allJobs = allJobs.concat(result.results || []);
     91    if (!propertyNames.length) {
     92      propertyNames = result.job_property_names || [];
     93    }
     94 
     95    url = result.next;
     96  }
     97 
     98  // Get field indices dynamically
     99  const jobTypeNameIndex = propertyNames.indexOf("job_type_name");
    100  const taskIdIndex = propertyNames.indexOf("task_id");
    101  const retryIdIndex = propertyNames.indexOf("retry_id");
    102  const lastModifiedIndex = propertyNames.indexOf("last_modified");
    103 
    104  const xpcshellJobs = allJobs
    105    .filter(
    106      job => job[jobTypeNameIndex] && job[jobTypeNameIndex].includes("xpcshell")
    107    )
    108    .map(job => ({
    109      name: job[jobTypeNameIndex],
    110      task_id: job[taskIdIndex],
    111      retry_id: job[retryIdIndex] || 0,
    112      start_time: job[lastModifiedIndex],
    113      repository: project,
    114    }));
    115 
    116  console.log(
    117    `Found ${xpcshellJobs.length} xpcshell jobs out of ${allJobs.length} total jobs`
    118  );
    119  return xpcshellJobs;
    120 }
    121 
    122 // Fetch xpcshell test data from treeherder database for a specific date
    123 async function fetchXpcshellData(targetDate) {
    124  console.log(`Fetching xpcshell test data for ${targetDate}...`);
    125 
    126  // Fetch data from the treeherder database if not already cached
    127  if (!allJobsCache) {
    128    console.log(`Querying treeherder database...`);
    129    const result = await fetchJson(
    130      "https://sql.telemetry.mozilla.org/api/queries/110630/results.json?api_key=Pyybfsna2r5KQkwYgSk9zqbYfc6Dv0rhxL99DFi1"
    131    );
    132 
    133    if (!result) {
    134      throw new Error("Failed to fetch data from treeherder database");
    135    }
    136 
    137    const allJobs = result.query_result.data.rows;
    138 
    139    // Cache only xpcshell jobs
    140    allJobsCache = allJobs.filter(job => job.name.includes("xpcshell"));
    141    console.log(
    142      `Cached ${allJobsCache.length} xpcshell jobs from treeherder database (out of ${allJobs.length} total jobs)`
    143    );
    144  }
    145 
    146  // Filter cached jobs for the target date
    147  return allJobsCache.filter(job => job.start_time.startsWith(targetDate));
    148 }
    149 
    150 // Process jobs using worker threads with dynamic job distribution
    151 async function processJobsWithWorkers(jobs, targetDate = null) {
    152  if (jobs.length === 0) {
    153    return [];
    154  }
    155 
    156  const dateStr = targetDate ? ` for ${targetDate}` : "";
    157  console.log(
    158    `Processing ${jobs.length} jobs${dateStr} using ${MAX_WORKERS} workers...`
    159  );
    160 
    161  const jobQueue = [...jobs];
    162  const results = [];
    163  const workers = [];
    164  let completedJobs = 0;
    165  let lastProgressTime = 0;
    166 
    167  return new Promise((resolve, reject) => {
    168    // Track worker states
    169    const workerStates = new Map();
    170 
    171    // Create workers
    172    for (let i = 0; i < MAX_WORKERS; i++) {
    173      const worker = new Worker(path.join(__dirname, "profile-worker.js"), {
    174        workerData: {
    175          profileCacheDir: PROFILE_CACHE_DIR,
    176          taskclusterBaseUrl: TASKCLUSTER_BASE_URL,
    177        },
    178      });
    179 
    180      workers.push(worker);
    181      workerStates.set(worker, { id: i + 1, ready: false, jobsProcessed: 0 });
    182 
    183      worker.on("message", message => {
    184        const workerState = workerStates.get(worker);
    185 
    186        if (message.type === "ready") {
    187          workerState.ready = true;
    188          assignNextJob(worker);
    189        } else if (message.type === "jobComplete") {
    190          workerState.jobsProcessed++;
    191          completedJobs++;
    192 
    193          if (message.result) {
    194            results.push(message.result);
    195          }
    196 
    197          // Show progress at most once per second, or on first/last job
    198          const now = Date.now();
    199          if (
    200            completedJobs === 1 ||
    201            completedJobs === jobs.length ||
    202            now - lastProgressTime >= 1000
    203          ) {
    204            const percentage = Math.round((completedJobs / jobs.length) * 100);
    205            const paddedCompleted = completedJobs
    206              .toString()
    207              .padStart(jobs.length.toString().length);
    208            const paddedPercentage = percentage.toString().padStart(3); // Pad to 3 chars for alignment (0-100%)
    209            console.log(
    210              ` ${paddedPercentage}% ${paddedCompleted}/${jobs.length}`
    211            );
    212            lastProgressTime = now;
    213          }
    214 
    215          // Assign next job or finish
    216          assignNextJob(worker);
    217        } else if (message.type === "finished") {
    218          checkAllComplete();
    219        } else if (message.type === "error") {
    220          reject(new Error(`Worker ${workerState.id} error: ${message.error}`));
    221        }
    222      });
    223 
    224      worker.on("error", error => {
    225        reject(
    226          new Error(
    227            `Worker ${workerStates.get(worker).id} thread error: ${error.message}`
    228          )
    229        );
    230      });
    231 
    232      worker.on("exit", code => {
    233        if (code !== 0) {
    234          reject(
    235            new Error(
    236              `Worker ${workerStates.get(worker).id} stopped with exit code ${code}`
    237            )
    238          );
    239        }
    240      });
    241    }
    242 
    243    function assignNextJob(worker) {
    244      if (jobQueue.length) {
    245        const job = jobQueue.shift();
    246        worker.postMessage({ type: "job", job });
    247      } else {
    248        // No more jobs, tell worker to finish
    249        worker.postMessage({ type: "shutdown" });
    250      }
    251    }
    252 
    253    let resolved = false;
    254    let workersFinished = 0;
    255 
    256    function checkAllComplete() {
    257      if (resolved) {
    258        return;
    259      }
    260 
    261      workersFinished++;
    262 
    263      if (workersFinished >= MAX_WORKERS) {
    264        resolved = true;
    265 
    266        // Terminate all workers to ensure clean exit
    267        workers.forEach(worker => worker.terminate());
    268 
    269        resolve(results);
    270      }
    271    }
    272  });
    273 }
    274 
    275 // Fetch Bugzilla component mapping data
    276 async function fetchComponentsData() {
    277  if (componentsData) {
    278    return componentsData;
    279  }
    280 
    281  console.log("Fetching Bugzilla component mapping...");
    282  const url = `${TASKCLUSTER_BASE_URL}/api/index/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components-normalized.json`;
    283 
    284  try {
    285    componentsData = await fetchJson(url);
    286    console.log("Component mapping loaded successfully");
    287    return componentsData;
    288  } catch (error) {
    289    console.error("Failed to fetch component mapping:", error);
    290    return null;
    291  }
    292 }
    293 
    294 // Look up component for a test path
    295 function findComponentForPath(testPath) {
    296  if (!componentsData || !componentsData.paths) {
    297    return null;
    298  }
    299 
    300  const parts = testPath.split("/");
    301  let current = componentsData.paths;
    302 
    303  for (const part of parts) {
    304    if (typeof current === "number") {
    305      return current;
    306    }
    307    if (typeof current === "object" && current !== null && part in current) {
    308      current = current[part];
    309    } else {
    310      return null;
    311    }
    312  }
    313 
    314  return typeof current === "number" ? current : null;
    315 }
    316 
    317 // Get component string from component ID
    318 function getComponentString(componentId) {
    319  if (!componentsData || !componentsData.components || componentId == null) {
    320    return null;
    321  }
    322 
    323  const component = componentsData.components[String(componentId)];
    324  if (!component || !Array.isArray(component) || component.length !== 2) {
    325    return null;
    326  }
    327 
    328  return `${component[0]} :: ${component[1]}`;
    329 }
    330 
    331 // Helper function to determine if a status should include message data
    332 function shouldIncludeMessage(status) {
    333  return status === "SKIP" || status.startsWith("FAIL");
    334 }
    335 
    336 // Create string tables and store raw data efficiently
    337 function createDataTables(jobResults) {
    338  const tables = {
    339    jobNames: [],
    340    testPaths: [],
    341    testNames: [],
    342    repositories: [],
    343    statuses: [],
    344    taskIds: [],
    345    messages: [],
    346    crashSignatures: [],
    347    components: [],
    348    commitIds: [],
    349  };
    350 
    351  // Maps for O(1) string lookups
    352  const stringMaps = {
    353    jobNames: new Map(),
    354    testPaths: new Map(),
    355    testNames: new Map(),
    356    repositories: new Map(),
    357    statuses: new Map(),
    358    taskIds: new Map(),
    359    messages: new Map(),
    360    crashSignatures: new Map(),
    361    components: new Map(),
    362    commitIds: new Map(),
    363  };
    364 
    365  // Task info maps task ID index to repository and job name indexes
    366  const taskInfo = {
    367    repositoryIds: [],
    368    jobNameIds: [],
    369    commitIds: [],
    370  };
    371 
    372  // Test info maps test ID index to test path and name indexes
    373  const testInfo = {
    374    testPathIds: [],
    375    testNameIds: [],
    376    componentIds: [],
    377  };
    378 
    379  // Map for fast testId lookup: fullPath -> testId
    380  const testIdMap = new Map();
    381 
    382  // Test runs grouped by test ID, then by status ID
    383  // testRuns[testId] = array of status groups for that test
    384  const testRuns = [];
    385 
    386  function findStringIndex(tableName, string) {
    387    const table = tables[tableName];
    388    const map = stringMaps[tableName];
    389 
    390    let index = map.get(string);
    391    if (index === undefined) {
    392      index = table.length;
    393      table.push(string);
    394      map.set(string, index);
    395    }
    396    return index;
    397  }
    398 
    399  for (const result of jobResults) {
    400    if (!result || !result.timings) {
    401      continue;
    402    }
    403 
    404    const jobNameId = findStringIndex("jobNames", result.jobName);
    405    const repositoryId = findStringIndex("repositories", result.repository);
    406    const commitId = result.commitId
    407      ? findStringIndex("commitIds", result.commitId)
    408      : null;
    409 
    410    for (const timing of result.timings) {
    411      const fullPath = timing.path;
    412 
    413      // Check if we already have this test
    414      let testId = testIdMap.get(fullPath);
    415      if (testId === undefined) {
    416        // New test - need to process path/name split and create entry
    417        const lastSlashIndex = fullPath.lastIndexOf("/");
    418 
    419        let testPath, testName;
    420        if (lastSlashIndex === -1) {
    421          // No directory, just the filename
    422          testPath = "";
    423          testName = fullPath;
    424        } else {
    425          testPath = fullPath.substring(0, lastSlashIndex);
    426          testName = fullPath.substring(lastSlashIndex + 1);
    427        }
    428 
    429        const testPathId = findStringIndex("testPaths", testPath);
    430        const testNameId = findStringIndex("testNames", testName);
    431 
    432        // Look up the component for this test
    433        const componentIdRaw = findComponentForPath(fullPath);
    434        const componentString = getComponentString(componentIdRaw);
    435        const componentId = componentString
    436          ? findStringIndex("components", componentString)
    437          : null;
    438 
    439        testId = testInfo.testPathIds.length;
    440        testInfo.testPathIds.push(testPathId);
    441        testInfo.testNameIds.push(testNameId);
    442        testInfo.componentIds.push(componentId);
    443        testIdMap.set(fullPath, testId);
    444      }
    445 
    446      const statusId = findStringIndex("statuses", timing.status || "UNKNOWN");
    447      const taskIdString = `${result.taskId}.${result.retryId}`;
    448      const taskIdId = findStringIndex("taskIds", taskIdString);
    449 
    450      // Store task info only once per unique task ID
    451      if (taskInfo.repositoryIds[taskIdId] === undefined) {
    452        taskInfo.repositoryIds[taskIdId] = repositoryId;
    453        taskInfo.jobNameIds[taskIdId] = jobNameId;
    454        taskInfo.commitIds[taskIdId] = commitId;
    455      }
    456 
    457      // Initialize test group if it doesn't exist
    458      if (!testRuns[testId]) {
    459        testRuns[testId] = [];
    460      }
    461 
    462      // Initialize status group within test if it doesn't exist
    463      let statusGroup = testRuns[testId][statusId];
    464      if (!statusGroup) {
    465        statusGroup = {
    466          taskIdIds: [],
    467          durations: [],
    468          timestamps: [],
    469        };
    470        // Include messageIds array for statuses that should have messages
    471        if (shouldIncludeMessage(timing.status)) {
    472          statusGroup.messageIds = [];
    473        }
    474        // Only include crash data arrays for CRASH status
    475        if (timing.status === "CRASH") {
    476          statusGroup.crashSignatureIds = [];
    477          statusGroup.minidumps = [];
    478        }
    479        testRuns[testId][statusId] = statusGroup;
    480      }
    481 
    482      // Add test run to the appropriate test/status group
    483      statusGroup.taskIdIds.push(taskIdId);
    484      statusGroup.durations.push(Math.round(timing.duration));
    485      statusGroup.timestamps.push(timing.timestamp);
    486 
    487      // Store message ID for statuses that should include messages (or null if no message)
    488      if (shouldIncludeMessage(timing.status)) {
    489        const messageId = timing.message
    490          ? findStringIndex("messages", timing.message)
    491          : null;
    492        statusGroup.messageIds.push(messageId);
    493      }
    494 
    495      // Store crash data for CRASH status (or null if not available)
    496      if (timing.status === "CRASH") {
    497        const crashSignatureId = timing.crashSignature
    498          ? findStringIndex("crashSignatures", timing.crashSignature)
    499          : null;
    500        statusGroup.crashSignatureIds.push(crashSignatureId);
    501        statusGroup.minidumps.push(timing.minidump || null);
    502      }
    503    }
    504  }
    505 
    506  return {
    507    tables,
    508    taskInfo,
    509    testInfo,
    510    testRuns,
    511  };
    512 }
    513 
    514 // Sort string tables by frequency and remap all indices for deterministic output and better compression
    515 function sortStringTablesByFrequency(dataStructure) {
    516  const { tables, taskInfo, testInfo, testRuns } = dataStructure;
    517 
    518  // Count frequency of each index for each table
    519  const frequencyCounts = {
    520    jobNames: new Array(tables.jobNames.length).fill(0),
    521    testPaths: new Array(tables.testPaths.length).fill(0),
    522    testNames: new Array(tables.testNames.length).fill(0),
    523    repositories: new Array(tables.repositories.length).fill(0),
    524    statuses: new Array(tables.statuses.length).fill(0),
    525    taskIds: new Array(tables.taskIds.length).fill(0),
    526    messages: new Array(tables.messages.length).fill(0),
    527    crashSignatures: new Array(tables.crashSignatures.length).fill(0),
    528    components: new Array(tables.components.length).fill(0),
    529    commitIds: new Array(tables.commitIds.length).fill(0),
    530  };
    531 
    532  // Count taskInfo references
    533  for (const jobNameId of taskInfo.jobNameIds) {
    534    if (jobNameId !== undefined) {
    535      frequencyCounts.jobNames[jobNameId]++;
    536    }
    537  }
    538  for (const repositoryId of taskInfo.repositoryIds) {
    539    if (repositoryId !== undefined) {
    540      frequencyCounts.repositories[repositoryId]++;
    541    }
    542  }
    543  for (const commitId of taskInfo.commitIds) {
    544    if (commitId !== null) {
    545      frequencyCounts.commitIds[commitId]++;
    546    }
    547  }
    548 
    549  // Count testInfo references
    550  for (const testPathId of testInfo.testPathIds) {
    551    frequencyCounts.testPaths[testPathId]++;
    552  }
    553  for (const testNameId of testInfo.testNameIds) {
    554    frequencyCounts.testNames[testNameId]++;
    555  }
    556  for (const componentId of testInfo.componentIds) {
    557    if (componentId !== null) {
    558      frequencyCounts.components[componentId]++;
    559    }
    560  }
    561 
    562  // Count testRuns references
    563  for (const testGroup of testRuns) {
    564    if (!testGroup) {
    565      continue;
    566    }
    567 
    568    testGroup.forEach((statusGroup, statusId) => {
    569      if (!statusGroup) {
    570        return;
    571      }
    572 
    573      // Handle both aggregated format (counts/hours) and detailed format (taskIdIds)
    574      if (statusGroup.taskIdIds) {
    575        // Check if taskIdIds is array of arrays (aggregated) or flat array (daily)
    576        const isArrayOfArrays =
    577          !!statusGroup.taskIdIds.length &&
    578          Array.isArray(statusGroup.taskIdIds[0]);
    579 
    580        if (isArrayOfArrays) {
    581          // Aggregated format: array of arrays
    582          const totalRuns = statusGroup.taskIdIds.reduce(
    583            (sum, arr) => sum + arr.length,
    584            0
    585          );
    586          frequencyCounts.statuses[statusId] += totalRuns;
    587 
    588          for (const taskIdIdsArray of statusGroup.taskIdIds) {
    589            for (const taskIdId of taskIdIdsArray) {
    590              frequencyCounts.taskIds[taskIdId]++;
    591            }
    592          }
    593        } else {
    594          // Daily format: flat array
    595          frequencyCounts.statuses[statusId] += statusGroup.taskIdIds.length;
    596 
    597          for (const taskIdId of statusGroup.taskIdIds) {
    598            frequencyCounts.taskIds[taskIdId]++;
    599          }
    600        }
    601      } else if (statusGroup.counts) {
    602        // Aggregated passing tests - count total runs
    603        const totalRuns = statusGroup.counts.reduce((a, b) => a + b, 0);
    604        frequencyCounts.statuses[statusId] += totalRuns;
    605      }
    606 
    607      if (statusGroup.messageIds) {
    608        for (const messageId of statusGroup.messageIds) {
    609          if (messageId !== null) {
    610            frequencyCounts.messages[messageId]++;
    611          }
    612        }
    613      }
    614 
    615      if (statusGroup.crashSignatureIds) {
    616        for (const crashSigId of statusGroup.crashSignatureIds) {
    617          if (crashSigId !== null) {
    618            frequencyCounts.crashSignatures[crashSigId]++;
    619          }
    620        }
    621      }
    622    });
    623  }
    624 
    625  // Create sorted tables and index mappings (sorted by frequency descending)
    626  const sortedTables = {};
    627  const indexMaps = {};
    628 
    629  for (const [tableName, table] of Object.entries(tables)) {
    630    const counts = frequencyCounts[tableName];
    631 
    632    // Create array with value, oldIndex, and count
    633    const indexed = table.map((value, oldIndex) => ({
    634      value,
    635      oldIndex,
    636      count: counts[oldIndex],
    637    }));
    638 
    639    // Sort by count descending, then by value for deterministic order when counts are equal
    640    indexed.sort((a, b) => {
    641      if (b.count !== a.count) {
    642        return b.count - a.count;
    643      }
    644      return a.value.localeCompare(b.value);
    645    });
    646 
    647    // Extract sorted values and create mapping
    648    sortedTables[tableName] = indexed.map(item => item.value);
    649    indexMaps[tableName] = new Map(
    650      indexed.map((item, newIndex) => [item.oldIndex, newIndex])
    651    );
    652  }
    653 
    654  // Remap taskInfo indices
    655  // taskInfo arrays are indexed by taskIdId, and when taskIds get remapped,
    656  // we need to rebuild the arrays at the new indices
    657  const sortedTaskInfo = {
    658    repositoryIds: [],
    659    jobNameIds: [],
    660    commitIds: [],
    661  };
    662 
    663  for (
    664    let oldTaskIdId = 0;
    665    oldTaskIdId < taskInfo.repositoryIds.length;
    666    oldTaskIdId++
    667  ) {
    668    const newTaskIdId = indexMaps.taskIds.get(oldTaskIdId);
    669    sortedTaskInfo.repositoryIds[newTaskIdId] = indexMaps.repositories.get(
    670      taskInfo.repositoryIds[oldTaskIdId]
    671    );
    672    sortedTaskInfo.jobNameIds[newTaskIdId] = indexMaps.jobNames.get(
    673      taskInfo.jobNameIds[oldTaskIdId]
    674    );
    675    sortedTaskInfo.commitIds[newTaskIdId] =
    676      taskInfo.commitIds[oldTaskIdId] === null
    677        ? null
    678        : indexMaps.commitIds.get(taskInfo.commitIds[oldTaskIdId]);
    679  }
    680 
    681  // Remap testInfo indices
    682  const sortedTestInfo = {
    683    testPathIds: testInfo.testPathIds.map(oldId =>
    684      indexMaps.testPaths.get(oldId)
    685    ),
    686    testNameIds: testInfo.testNameIds.map(oldId =>
    687      indexMaps.testNames.get(oldId)
    688    ),
    689    componentIds: testInfo.componentIds.map(oldId =>
    690      oldId === null ? null : indexMaps.components.get(oldId)
    691    ),
    692  };
    693 
    694  // Remap testRuns indices
    695  const sortedTestRuns = testRuns.map(testGroup => {
    696    if (!testGroup) {
    697      return testGroup;
    698    }
    699 
    700    return testGroup.map(statusGroup => {
    701      if (!statusGroup) {
    702        return statusGroup;
    703      }
    704 
    705      // Handle aggregated format (counts/hours) differently from detailed format
    706      if (statusGroup.counts) {
    707        // Aggregated passing tests - no remapping needed
    708        return {
    709          counts: statusGroup.counts,
    710          hours: statusGroup.hours,
    711        };
    712      }
    713 
    714      // Check if this is aggregated format (array of arrays) or daily format (flat array)
    715      const isArrayOfArrays =
    716        !!statusGroup.taskIdIds.length &&
    717        Array.isArray(statusGroup.taskIdIds[0]);
    718 
    719      const remapped = {};
    720 
    721      if (isArrayOfArrays) {
    722        // Aggregated format: array of arrays with hours
    723        remapped.taskIdIds = statusGroup.taskIdIds.map(taskIdIdsArray =>
    724          taskIdIdsArray.map(oldId => indexMaps.taskIds.get(oldId))
    725        );
    726        remapped.hours = statusGroup.hours;
    727      } else {
    728        // Daily format: flat array with durations and timestamps
    729        remapped.taskIdIds = statusGroup.taskIdIds.map(oldId =>
    730          indexMaps.taskIds.get(oldId)
    731        );
    732        remapped.durations = statusGroup.durations;
    733        remapped.timestamps = statusGroup.timestamps;
    734      }
    735 
    736      // Remap message IDs for status groups that have messages
    737      if (statusGroup.messageIds) {
    738        remapped.messageIds = statusGroup.messageIds.map(oldId =>
    739          oldId === null ? null : indexMaps.messages.get(oldId)
    740        );
    741      }
    742 
    743      // Remap crash data for CRASH status
    744      if (statusGroup.crashSignatureIds) {
    745        remapped.crashSignatureIds = statusGroup.crashSignatureIds.map(oldId =>
    746          oldId === null ? null : indexMaps.crashSignatures.get(oldId)
    747        );
    748      }
    749      if (statusGroup.minidumps) {
    750        remapped.minidumps = statusGroup.minidumps;
    751      }
    752 
    753      return remapped;
    754    });
    755  });
    756 
    757  // Remap statusId positions in testRuns (move status groups to their new positions)
    758  const finalTestRuns = sortedTestRuns.map(testGroup => {
    759    if (!testGroup) {
    760      return testGroup;
    761    }
    762 
    763    const remappedGroup = [];
    764    testGroup.forEach((statusGroup, oldStatusId) => {
    765      if (!statusGroup) {
    766        return;
    767      }
    768      const newStatusId = indexMaps.statuses.get(oldStatusId);
    769      remappedGroup[newStatusId] = statusGroup;
    770    });
    771 
    772    return remappedGroup;
    773  });
    774 
    775  return {
    776    tables: sortedTables,
    777    taskInfo: sortedTaskInfo,
    778    testInfo: sortedTestInfo,
    779    testRuns: finalTestRuns,
    780  };
    781 }
    782 
    783 // Create resource usage data structure
    784 function createResourceUsageData(jobResults) {
    785  const jobNames = [];
    786  const jobNameMap = new Map();
    787  const repositories = [];
    788  const repositoryMap = new Map();
    789  const machineInfos = [];
    790  const machineInfoMap = new Map();
    791 
    792  // Collect all job data first
    793  const jobDataList = [];
    794 
    795  for (const result of jobResults) {
    796    if (!result || !result.resourceUsage) {
    797      continue;
    798    }
    799 
    800    // Extract chunk number from job name (e.g., "test-linux1804-64/opt-xpcshell-1" -> "test-linux1804-64/opt-xpcshell", chunk: 1)
    801    let jobNameBase = result.jobName;
    802    let chunkNumber = null;
    803    const match = result.jobName.match(/^(.+)-(\d+)$/);
    804    if (match) {
    805      jobNameBase = match[1];
    806      chunkNumber = parseInt(match[2], 10);
    807    }
    808 
    809    // Get or create job name index
    810    let jobNameId = jobNameMap.get(jobNameBase);
    811    if (jobNameId === undefined) {
    812      jobNameId = jobNames.length;
    813      jobNames.push(jobNameBase);
    814      jobNameMap.set(jobNameBase, jobNameId);
    815    }
    816 
    817    // Get or create repository index
    818    let repositoryId = repositoryMap.get(result.repository);
    819    if (repositoryId === undefined) {
    820      repositoryId = repositories.length;
    821      repositories.push(result.repository);
    822      repositoryMap.set(result.repository, repositoryId);
    823    }
    824 
    825    // Get or create machine info index
    826    const machineInfo = result.resourceUsage.machineInfo;
    827    const machineInfoKey = JSON.stringify(machineInfo);
    828    let machineInfoId = machineInfoMap.get(machineInfoKey);
    829    if (machineInfoId === undefined) {
    830      machineInfoId = machineInfos.length;
    831      machineInfos.push(machineInfo);
    832      machineInfoMap.set(machineInfoKey, machineInfoId);
    833    }
    834 
    835    // Combine taskId and retryId (omit .0 for retry 0)
    836    const taskIdString =
    837      result.retryId === 0
    838        ? result.taskId
    839        : `${result.taskId}.${result.retryId}`;
    840 
    841    jobDataList.push({
    842      jobNameId,
    843      chunk: chunkNumber,
    844      taskId: taskIdString,
    845      repositoryId,
    846      startTime: result.startTime,
    847      machineInfoId,
    848      maxMemory: result.resourceUsage.maxMemory,
    849      idleTime: result.resourceUsage.idleTime,
    850      singleCoreTime: result.resourceUsage.singleCoreTime,
    851      cpuBuckets: result.resourceUsage.cpuBuckets,
    852    });
    853  }
    854 
    855  // Sort by start time
    856  jobDataList.sort((a, b) => a.startTime - b.startTime);
    857 
    858  // Apply differential compression to start times and build parallel arrays
    859  const jobs = {
    860    jobNameIds: [],
    861    chunks: [],
    862    taskIds: [],
    863    repositoryIds: [],
    864    startTimes: [],
    865    machineInfoIds: [],
    866    maxMemories: [],
    867    idleTimes: [],
    868    singleCoreTimes: [],
    869    cpuBuckets: [],
    870  };
    871 
    872  let previousStartTime = 0;
    873  for (const jobData of jobDataList) {
    874    jobs.jobNameIds.push(jobData.jobNameId);
    875    jobs.chunks.push(jobData.chunk);
    876    jobs.taskIds.push(jobData.taskId);
    877    jobs.repositoryIds.push(jobData.repositoryId);
    878 
    879    // Differential compression: store difference from previous
    880    const timeDiff = jobData.startTime - previousStartTime;
    881    jobs.startTimes.push(timeDiff);
    882    previousStartTime = jobData.startTime;
    883 
    884    jobs.machineInfoIds.push(jobData.machineInfoId);
    885    jobs.maxMemories.push(jobData.maxMemory);
    886    jobs.idleTimes.push(jobData.idleTime);
    887    jobs.singleCoreTimes.push(jobData.singleCoreTime);
    888    jobs.cpuBuckets.push(jobData.cpuBuckets);
    889  }
    890 
    891  return {
    892    jobNames,
    893    repositories,
    894    machineInfos,
    895    jobs,
    896  };
    897 }
    898 
    899 // Helper to save a JSON file and log its size
    900 function saveJsonFile(data, filePath) {
    901  fs.writeFileSync(filePath, JSON.stringify(data));
    902 
    903  const stats = fs.statSync(filePath);
    904  const fileSizeBytes = stats.size;
    905 
    906  // Use MB for files >= 1MB, otherwise KB
    907  if (fileSizeBytes >= 1024 * 1024) {
    908    const fileSizeMB = Math.round(fileSizeBytes / (1024 * 1024));
    909    const formattedBytes = fileSizeBytes.toLocaleString();
    910    console.log(
    911      `Saved ${filePath} - ${fileSizeMB}MB (${formattedBytes} bytes)`
    912    );
    913  } else {
    914    const fileSizeKB = Math.round(fileSizeBytes / 1024);
    915    console.log(`Saved ${filePath} - ${fileSizeKB}KB`);
    916  }
    917 }
    918 
    919 // Common function to process jobs and create data structure
    920 async function processJobsAndCreateData(
    921  jobs,
    922  targetLabel,
    923  startTime,
    924  metadata
    925 ) {
    926  if (jobs.length === 0) {
    927    console.log(`No jobs found for ${targetLabel}.`);
    928    return null;
    929  }
    930 
    931  // Process jobs to extract test timings
    932  const jobProcessingStart = Date.now();
    933  const jobResults = await processJobsWithWorkers(jobs, targetLabel);
    934  const jobProcessingTime = Date.now() - jobProcessingStart;
    935  console.log(
    936    `Successfully processed ${jobResults.length} jobs in ${jobProcessingTime}ms`
    937  );
    938 
    939  // Create efficient data tables
    940  const dataTablesStart = Date.now();
    941  let dataStructure = createDataTables(jobResults);
    942  const dataTablesTime = Date.now() - dataTablesStart;
    943  console.log(`Created data tables in ${dataTablesTime}ms:`);
    944 
    945  // Check if any test runs were extracted
    946  const hasTestRuns = !!dataStructure.testRuns.length;
    947  if (!hasTestRuns) {
    948    console.log(`No test run data extracted for ${targetLabel}`);
    949    return null;
    950  }
    951 
    952  const totalRuns = dataStructure.testRuns.reduce((sum, testGroup) => {
    953    if (!testGroup) {
    954      return sum;
    955    }
    956    return (
    957      sum +
    958      testGroup.reduce(
    959        (testSum, statusGroup) =>
    960          testSum + (statusGroup ? statusGroup.taskIdIds.length : 0),
    961        0
    962      )
    963    );
    964  }, 0);
    965  console.log(
    966    `  ${dataStructure.testInfo.testPathIds.length} tests, ${totalRuns} runs, ${dataStructure.tables.taskIds.length} tasks, ${dataStructure.tables.jobNames.length} job names, ${dataStructure.tables.statuses.length} statuses`
    967  );
    968 
    969  // Sort string tables by frequency for deterministic output and better compression
    970  const sortingStart = Date.now();
    971  dataStructure = sortStringTablesByFrequency(dataStructure);
    972  const sortingTime = Date.now() - sortingStart;
    973  console.log(`Sorted string tables by frequency in ${sortingTime}ms`);
    974 
    975  // Convert absolute timestamps to relative and apply differential compression (in place)
    976  for (const testGroup of dataStructure.testRuns) {
    977    if (!testGroup) {
    978      continue;
    979    }
    980 
    981    for (const statusGroup of testGroup) {
    982      if (!statusGroup) {
    983        continue;
    984      }
    985 
    986      // Convert timestamps to relative in place
    987      for (let i = 0; i < statusGroup.timestamps.length; i++) {
    988        statusGroup.timestamps[i] =
    989          Math.floor(statusGroup.timestamps[i] / 1000) - startTime;
    990      }
    991 
    992      // Map to array of objects including crash data if present
    993      const runs = statusGroup.timestamps.map((ts, i) => {
    994        const run = {
    995          timestamp: ts,
    996          taskIdId: statusGroup.taskIdIds[i],
    997          duration: statusGroup.durations[i],
    998        };
    999        // Include crash data if this is a CRASH status group
   1000        if (statusGroup.crashSignatureIds) {
   1001          run.crashSignatureId = statusGroup.crashSignatureIds[i];
   1002        }
   1003        if (statusGroup.minidumps) {
   1004          run.minidump = statusGroup.minidumps[i];
   1005        }
   1006        // Include message data if this status group has messages
   1007        if (statusGroup.messageIds) {
   1008          run.messageId = statusGroup.messageIds[i];
   1009        }
   1010        return run;
   1011      });
   1012 
   1013      // Sort by timestamp
   1014      runs.sort((a, b) => a.timestamp - b.timestamp);
   1015 
   1016      // Apply differential compression in place for timestamps
   1017      let previousTimestamp = 0;
   1018      for (const run of runs) {
   1019        const currentTimestamp = run.timestamp;
   1020        run.timestamp = currentTimestamp - previousTimestamp;
   1021        previousTimestamp = currentTimestamp;
   1022      }
   1023 
   1024      // Update in place
   1025      statusGroup.taskIdIds = runs.map(run => run.taskIdId);
   1026      statusGroup.durations = runs.map(run => run.duration);
   1027      statusGroup.timestamps = runs.map(run => run.timestamp);
   1028      // Update crash data arrays if present
   1029      if (statusGroup.crashSignatureIds) {
   1030        statusGroup.crashSignatureIds = runs.map(run => run.crashSignatureId);
   1031      }
   1032      if (statusGroup.minidumps) {
   1033        statusGroup.minidumps = runs.map(run => run.minidump);
   1034      }
   1035      // Update message data arrays if present
   1036      if (statusGroup.messageIds) {
   1037        statusGroup.messageIds = runs.map(run => run.messageId);
   1038      }
   1039    }
   1040  }
   1041 
   1042  // Build output with metadata
   1043  return {
   1044    testData: {
   1045      metadata: {
   1046        ...metadata,
   1047        startTime,
   1048        generatedAt: new Date().toISOString(),
   1049        jobCount: jobs.length,
   1050        processedJobCount: jobResults.length,
   1051      },
   1052      tables: dataStructure.tables,
   1053      taskInfo: dataStructure.taskInfo,
   1054      testInfo: dataStructure.testInfo,
   1055      testRuns: dataStructure.testRuns,
   1056    },
   1057    resourceData: createResourceUsageData(jobResults),
   1058  };
   1059 }
   1060 
   1061 async function processRevisionData(project, revision, forceRefetch = false) {
   1062  console.log(`Fetching xpcshell test data for ${project}:${revision}`);
   1063  console.log(`=== Processing ${project}:${revision} ===`);
   1064 
   1065  const cacheFile = path.join(
   1066    OUTPUT_DIR,
   1067    `xpcshell-${project}-${revision}.json`
   1068  );
   1069 
   1070  // Check if we already have data for this revision
   1071  if (fs.existsSync(cacheFile) && !forceRefetch) {
   1072    console.log(`Data for ${project}:${revision} already exists. Skipping.`);
   1073    return null;
   1074  }
   1075 
   1076  if (forceRefetch) {
   1077    console.log(
   1078      `Force flag detected, re-fetching data for ${project}:${revision}...`
   1079    );
   1080  }
   1081 
   1082  try {
   1083    // Fetch push ID from revision
   1084    const pushId = await fetchCommitData(project, revision);
   1085 
   1086    // Fetch jobs for the push
   1087    const jobs = await fetchPushJobs(project, pushId);
   1088 
   1089    if (jobs.length === 0) {
   1090      console.log(`No xpcshell jobs found for ${project}:${revision}.`);
   1091      return null;
   1092    }
   1093 
   1094    // Use the last_modified time of the first job as start time
   1095    const startTime = jobs.length
   1096      ? Math.floor(new Date(jobs[0].start_time).getTime() / 1000)
   1097      : Math.floor(Date.now() / 1000);
   1098 
   1099    const output = await processJobsAndCreateData(
   1100      jobs,
   1101      `${project}-${revision}`,
   1102      startTime,
   1103      {
   1104        project,
   1105        revision,
   1106        pushId,
   1107      }
   1108    );
   1109 
   1110    if (!output) {
   1111      return null;
   1112    }
   1113 
   1114    saveJsonFile(output.testData, cacheFile);
   1115    const resourceCacheFile = path.join(
   1116      OUTPUT_DIR,
   1117      `xpcshell-${project}-${revision}-resources.json`
   1118    );
   1119    saveJsonFile(output.resourceData, resourceCacheFile);
   1120 
   1121    return output;
   1122  } catch (error) {
   1123    console.error(`Error processing ${project}:${revision}:`, error);
   1124    return null;
   1125  }
   1126 }
   1127 
   1128 // Fetch previous run metadata from Taskcluster
   1129 async function fetchPreviousRunData() {
   1130  try {
   1131    // Fetch task info for the current task to get the index name from the routes.
   1132    const taskUrl = `${TASKCLUSTER_BASE_URL}/api/queue/v1/task/${process.env.TASK_ID}`;
   1133    const taskData = await fetchJson(taskUrl);
   1134    if (!taskData) {
   1135      console.log(`Failed to fetch task info from ${taskUrl}`);
   1136      return;
   1137    }
   1138 
   1139    const routes = taskData.routes || [];
   1140    // Find a route that starts with "index." and contains ".latest."
   1141    const latestRoute = routes.find(
   1142      route => route.startsWith("index.") && route.includes(".latest.")
   1143    );
   1144    if (!latestRoute) {
   1145      console.log(
   1146        `No route found with 'index.' prefix and '.latest.' in name. Available routes: ${JSON.stringify(routes)}`
   1147      );
   1148      return;
   1149    }
   1150 
   1151    // Remove "index." prefix from route to get index name
   1152    const indexName = latestRoute.replace(/^index\./, "");
   1153    console.log(`Using index: ${indexName}`);
   1154 
   1155    // Store artifacts URL for later use by processDateData
   1156    const artifactsUrl = `${TASKCLUSTER_BASE_URL}/api/index/v1/task/${indexName}/artifacts/public`;
   1157 
   1158    // Fetch the index.json from the previous run
   1159    const indexUrl = `${artifactsUrl}/index.json`;
   1160    console.log(`Fetching previous run data from ${indexUrl}`);
   1161    const indexData = await fetchJson(indexUrl);
   1162    if (!indexData) {
   1163      console.log(`Failed to fetch index.json from ${indexUrl}`);
   1164      return;
   1165    }
   1166 
   1167    const dates = indexData.dates || [];
   1168 
   1169    console.log(`Found ${dates.length} dates in previous run`);
   1170 
   1171    previousRunData = {
   1172      dates: new Set(dates),
   1173      artifactsUrl,
   1174    };
   1175 
   1176    console.log("Previous run metadata loaded\n");
   1177  } catch (error) {
   1178    console.log(`Error fetching previous run metadata: ${error.message}`);
   1179  }
   1180 }
   1181 
   1182 // Process data for a single date
   1183 async function processDateData(targetDate, forceRefetch = false) {
   1184  const timingsFilename = `xpcshell-${targetDate}.json`;
   1185  const resourcesFilename = `xpcshell-${targetDate}-resources.json`;
   1186  const timingsPath = path.join(OUTPUT_DIR, timingsFilename);
   1187  const resourcesPath = path.join(OUTPUT_DIR, resourcesFilename);
   1188 
   1189  // Check if we already have data for this date
   1190  if (fs.existsSync(timingsPath) && !forceRefetch) {
   1191    console.log(`Data for ${targetDate} already exists. Skipping.`);
   1192    return;
   1193  }
   1194 
   1195  // Fetch jobs list first (needed for verification)
   1196  let jobs;
   1197  try {
   1198    jobs = await fetchXpcshellData(targetDate);
   1199    if (jobs.length === 0) {
   1200      console.log(`No jobs found for ${targetDate}.`);
   1201      return;
   1202    }
   1203  } catch (error) {
   1204    console.error(`Error fetching jobs for ${targetDate}:`, error);
   1205    return;
   1206  }
   1207 
   1208  // Try to fetch from previous run if available and not forcing refetch
   1209  if (
   1210    !forceRefetch &&
   1211    previousRunData &&
   1212    previousRunData.dates.has(targetDate)
   1213  ) {
   1214    try {
   1215      const [timings, resources] = await Promise.all([
   1216        fetchJson(`${previousRunData.artifactsUrl}/${timingsFilename}`),
   1217        fetchJson(`${previousRunData.artifactsUrl}/${resourcesFilename}`),
   1218      ]);
   1219 
   1220      if (timings && resources) {
   1221        const expectedJobCount = jobs.length;
   1222        const actualProcessedCount = timings.metadata?.processedJobCount;
   1223 
   1224        if (actualProcessedCount < expectedJobCount) {
   1225          const missingJobs = expectedJobCount - actualProcessedCount;
   1226          console.log(
   1227            `Ignoring artifact from previous run: missing ${missingJobs} jobs (expected ${expectedJobCount}, got ${actualProcessedCount})`
   1228          );
   1229        } else {
   1230          console.log(`Fetched valid artifact from previous run.`);
   1231          saveJsonFile(timings, timingsPath);
   1232          saveJsonFile(resources, resourcesPath);
   1233          return;
   1234        }
   1235      } else {
   1236        console.log(
   1237          `Error fetching artifact from previous run: artifact not found`
   1238        );
   1239      }
   1240    } catch (error) {
   1241      console.log(
   1242        `Error fetching artifact from previous run: ${error.message}`
   1243      );
   1244    }
   1245  }
   1246 
   1247  if (forceRefetch) {
   1248    console.log(`Force flag detected, re-fetching data for ${targetDate}...`);
   1249  }
   1250 
   1251  try {
   1252    // Calculate start of day timestamp for relative time calculation
   1253    const startOfDay = new Date(targetDate + "T00:00:00.000Z");
   1254    const startTime = Math.floor(startOfDay.getTime() / 1000); // Convert to seconds
   1255 
   1256    const output = await processJobsAndCreateData(jobs, targetDate, startTime, {
   1257      date: targetDate,
   1258    });
   1259    if (!output) {
   1260      return;
   1261    }
   1262 
   1263    saveJsonFile(output.testData, timingsPath);
   1264    saveJsonFile(output.resourceData, resourcesPath);
   1265  } catch (error) {
   1266    console.error(`Error processing ${targetDate}:`, error);
   1267  }
   1268 }
   1269 
   1270 // eslint-disable-next-line complexity
   1271 async function createAggregatedFailuresFile(dates) {
   1272  console.log(
   1273    `\n=== Creating aggregated failures file from ${dates.length} days ===`
   1274  );
   1275 
   1276  const dailyFiles = [];
   1277  for (const date of dates) {
   1278    const filePath = path.join(OUTPUT_DIR, `xpcshell-${date}.json`);
   1279    if (fs.existsSync(filePath)) {
   1280      dailyFiles.push({ date, filePath });
   1281    }
   1282  }
   1283 
   1284  if (dailyFiles.length === 0) {
   1285    console.log("No daily files found to aggregate");
   1286    return;
   1287  }
   1288 
   1289  console.log(`Found ${dailyFiles.length} daily files to aggregate`);
   1290 
   1291  const startDate = dates[dates.length - 1];
   1292  const endDate = dates[0];
   1293  const startTime = Math.floor(
   1294    new Date(startDate + "T00:00:00.000Z").getTime() / 1000
   1295  );
   1296 
   1297  const mergedTables = {
   1298    jobNames: [],
   1299    testPaths: [],
   1300    testNames: [],
   1301    repositories: [],
   1302    statuses: [],
   1303    taskIds: [],
   1304    messages: [],
   1305    crashSignatures: [],
   1306    components: [],
   1307    commitIds: [],
   1308  };
   1309 
   1310  const stringMaps = {
   1311    jobNames: new Map(),
   1312    testPaths: new Map(),
   1313    testNames: new Map(),
   1314    repositories: new Map(),
   1315    statuses: new Map(),
   1316    taskIds: new Map(),
   1317    messages: new Map(),
   1318    crashSignatures: new Map(),
   1319    components: new Map(),
   1320    commitIds: new Map(),
   1321  };
   1322 
   1323  function addToMergedTable(tableName, value) {
   1324    if (value === null || value === undefined) {
   1325      return null;
   1326    }
   1327    const map = stringMaps[tableName];
   1328    let index = map.get(value);
   1329    if (index === undefined) {
   1330      index = mergedTables[tableName].length;
   1331      mergedTables[tableName].push(value);
   1332      map.set(value, index);
   1333    }
   1334    return index;
   1335  }
   1336 
   1337  const mergedTaskInfo = {
   1338    repositoryIds: [],
   1339    jobNameIds: [],
   1340    commitIds: [],
   1341  };
   1342 
   1343  const mergedTestInfo = {
   1344    testPathIds: [],
   1345    testNameIds: [],
   1346    componentIds: [],
   1347  };
   1348 
   1349  const testPathMap = new Map();
   1350  const mergedTestRuns = [];
   1351 
   1352  for (let fileIdx = 0; fileIdx < dailyFiles.length; fileIdx++) {
   1353    const { date, filePath } = dailyFiles[fileIdx];
   1354    console.log(`Processing ${fileIdx + 1}/${dailyFiles.length}: ${date}...`);
   1355 
   1356    const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
   1357 
   1358    const dayStartTime = data.metadata.startTime;
   1359    const timeOffset = dayStartTime - startTime;
   1360 
   1361    for (let testId = 0; testId < data.testRuns.length; testId++) {
   1362      const testGroup = data.testRuns[testId];
   1363      if (!testGroup) {
   1364        continue;
   1365      }
   1366 
   1367      const testPathId = data.testInfo.testPathIds[testId];
   1368      const testNameId = data.testInfo.testNameIds[testId];
   1369      const componentId = data.testInfo.componentIds[testId];
   1370 
   1371      const testPath = data.tables.testPaths[testPathId];
   1372      const testName = data.tables.testNames[testNameId];
   1373      const fullPath = testPath ? `${testPath}/${testName}` : testName;
   1374 
   1375      let mergedTestId = testPathMap.get(fullPath);
   1376      if (mergedTestId === undefined) {
   1377        mergedTestId = mergedTestInfo.testPathIds.length;
   1378 
   1379        const mergedTestPathId = addToMergedTable("testPaths", testPath);
   1380        const mergedTestNameId = addToMergedTable("testNames", testName);
   1381        const component =
   1382          componentId !== null ? data.tables.components[componentId] : null;
   1383        const mergedComponentId = addToMergedTable("components", component);
   1384 
   1385        mergedTestInfo.testPathIds.push(mergedTestPathId);
   1386        mergedTestInfo.testNameIds.push(mergedTestNameId);
   1387        mergedTestInfo.componentIds.push(mergedComponentId);
   1388 
   1389        testPathMap.set(fullPath, mergedTestId);
   1390        mergedTestRuns[mergedTestId] = [];
   1391      }
   1392 
   1393      for (let statusId = 0; statusId < testGroup.length; statusId++) {
   1394        const statusGroup = testGroup[statusId];
   1395        if (!statusGroup) {
   1396          continue;
   1397        }
   1398 
   1399        const status = data.tables.statuses[statusId];
   1400        const mergedStatusId = addToMergedTable("statuses", status);
   1401 
   1402        if (!mergedTestRuns[mergedTestId][mergedStatusId]) {
   1403          mergedTestRuns[mergedTestId][mergedStatusId] = [];
   1404        }
   1405 
   1406        const mergedStatusGroup = mergedTestRuns[mergedTestId][mergedStatusId];
   1407        const isPass = status.startsWith("PASS");
   1408 
   1409        let absoluteTimestamp = 0;
   1410        for (let i = 0; i < statusGroup.taskIdIds.length; i++) {
   1411          absoluteTimestamp += statusGroup.timestamps[i];
   1412 
   1413          // Skip platform-irrelevant tests (SKIP with run-if messages)
   1414          if (
   1415            status === "SKIP" &&
   1416            data.tables.messages[statusGroup.messageIds?.[i]]?.startsWith(
   1417              "run-if"
   1418            )
   1419          ) {
   1420            continue;
   1421          }
   1422 
   1423          const taskIdId = statusGroup.taskIdIds[i];
   1424          const taskIdString = data.tables.taskIds[taskIdId];
   1425          const repositoryId = data.taskInfo.repositoryIds[taskIdId];
   1426          const jobNameId = data.taskInfo.jobNameIds[taskIdId];
   1427          const commitId = data.taskInfo.commitIds[taskIdId];
   1428 
   1429          const repository = data.tables.repositories[repositoryId];
   1430          const jobName = data.tables.jobNames[jobNameId];
   1431          const commitIdString =
   1432            commitId !== null ? data.tables.commitIds[commitId] : null;
   1433 
   1434          const mergedRepositoryId = addToMergedTable(
   1435            "repositories",
   1436            repository
   1437          );
   1438          const mergedJobNameId = addToMergedTable("jobNames", jobName);
   1439          const mergedCommitId = addToMergedTable("commitIds", commitIdString);
   1440 
   1441          const run = {
   1442            repositoryId: mergedRepositoryId,
   1443            jobNameId: mergedJobNameId,
   1444            timestamp: absoluteTimestamp + timeOffset,
   1445            duration: statusGroup.durations[i],
   1446          };
   1447 
   1448          mergedStatusGroup.push(run);
   1449 
   1450          if (isPass) {
   1451            continue;
   1452          }
   1453 
   1454          const mergedTaskIdId = addToMergedTable("taskIds", taskIdString);
   1455 
   1456          if (mergedTaskInfo.repositoryIds[mergedTaskIdId] === undefined) {
   1457            mergedTaskInfo.repositoryIds[mergedTaskIdId] = mergedRepositoryId;
   1458            mergedTaskInfo.jobNameIds[mergedTaskIdId] = mergedJobNameId;
   1459            mergedTaskInfo.commitIds[mergedTaskIdId] = mergedCommitId;
   1460          }
   1461 
   1462          run.taskIdId = mergedTaskIdId;
   1463 
   1464          if (statusGroup.messageIds && statusGroup.messageIds[i] !== null) {
   1465            const message = data.tables.messages[statusGroup.messageIds[i]];
   1466            run.messageId = addToMergedTable("messages", message);
   1467          } else if (statusGroup.messageIds) {
   1468            run.messageId = null;
   1469          }
   1470 
   1471          if (
   1472            statusGroup.crashSignatureIds &&
   1473            statusGroup.crashSignatureIds[i] !== null
   1474          ) {
   1475            const crashSig =
   1476              data.tables.crashSignatures[statusGroup.crashSignatureIds[i]];
   1477            run.crashSignatureId = addToMergedTable(
   1478              "crashSignatures",
   1479              crashSig
   1480            );
   1481          } else if (statusGroup.crashSignatureIds) {
   1482            run.crashSignatureId = null;
   1483          }
   1484 
   1485          if (statusGroup.minidumps) {
   1486            run.minidump = statusGroup.minidumps[i];
   1487          }
   1488        }
   1489      }
   1490    }
   1491  }
   1492 
   1493  function aggregateRunsByHour(
   1494    statusGroup,
   1495    includeMessages = false,
   1496    returnTaskIds = false
   1497  ) {
   1498    const buckets = new Map();
   1499    for (const run of statusGroup) {
   1500      const hourBucket = Math.floor(run.timestamp / 3600);
   1501      let key = hourBucket;
   1502 
   1503      if (includeMessages && "messageId" in run) {
   1504        key = `${hourBucket}:m${run.messageId}`;
   1505      } else if (includeMessages && "crashSignatureId" in run) {
   1506        key = `${hourBucket}:c${run.crashSignatureId}`;
   1507      }
   1508 
   1509      if (!buckets.has(key)) {
   1510        buckets.set(key, {
   1511          hour: hourBucket,
   1512          count: 0,
   1513          taskIdIds: [],
   1514          minidumps: [],
   1515          messageId: run.messageId,
   1516          crashSignatureId: run.crashSignatureId,
   1517        });
   1518      }
   1519      const bucket = buckets.get(key);
   1520      bucket.count++;
   1521      if (returnTaskIds && run.taskIdId !== undefined) {
   1522        bucket.taskIdIds.push(run.taskIdId);
   1523      }
   1524      if (returnTaskIds && "minidump" in run) {
   1525        bucket.minidumps.push(run.minidump ?? null);
   1526      }
   1527    }
   1528 
   1529    const aggregated = Array.from(buckets.values()).sort((a, b) => {
   1530      if (a.hour !== b.hour) {
   1531        return a.hour - b.hour;
   1532      }
   1533      if (a.messageId !== b.messageId) {
   1534        if (a.messageId === null || a.messageId === undefined) {
   1535          return 1;
   1536        }
   1537        if (b.messageId === null || b.messageId === undefined) {
   1538          return -1;
   1539        }
   1540        return a.messageId - b.messageId;
   1541      }
   1542      if (a.crashSignatureId !== b.crashSignatureId) {
   1543        if (a.crashSignatureId === null || a.crashSignatureId === undefined) {
   1544          return 1;
   1545        }
   1546        if (b.crashSignatureId === null || b.crashSignatureId === undefined) {
   1547          return -1;
   1548        }
   1549        return a.crashSignatureId - b.crashSignatureId;
   1550      }
   1551      return 0;
   1552    });
   1553 
   1554    const hours = [];
   1555    let previousBucket = 0;
   1556    for (const item of aggregated) {
   1557      hours.push(item.hour - previousBucket);
   1558      previousBucket = item.hour;
   1559    }
   1560 
   1561    const result = {
   1562      hours,
   1563    };
   1564 
   1565    if (returnTaskIds) {
   1566      result.taskIdIds = aggregated.map(a => a.taskIdIds);
   1567    } else {
   1568      result.counts = aggregated.map(a => a.count);
   1569    }
   1570 
   1571    if (includeMessages) {
   1572      if (aggregated.some(a => "messageId" in a && a.messageId !== undefined)) {
   1573        result.messageIds = aggregated.map(a => a.messageId ?? null);
   1574      }
   1575      if (
   1576        aggregated.some(
   1577          a => "crashSignatureId" in a && a.crashSignatureId !== undefined
   1578        )
   1579      ) {
   1580        result.crashSignatureIds = aggregated.map(
   1581          a => a.crashSignatureId ?? null
   1582        );
   1583      }
   1584      if (returnTaskIds && aggregated.some(a => a.minidumps?.length)) {
   1585        result.minidumps = aggregated.map(a => a.minidumps);
   1586      }
   1587    }
   1588 
   1589    return result;
   1590  }
   1591 
   1592  console.log("Aggregating passing test runs by hour...");
   1593 
   1594  const finalTestRuns = [];
   1595 
   1596  for (let testId = 0; testId < mergedTestRuns.length; testId++) {
   1597    const testGroup = mergedTestRuns[testId];
   1598    if (!testGroup) {
   1599      continue;
   1600    }
   1601 
   1602    finalTestRuns[testId] = [];
   1603 
   1604    for (let statusId = 0; statusId < testGroup.length; statusId++) {
   1605      const statusGroup = testGroup[statusId];
   1606      if (!statusGroup || statusGroup.length === 0) {
   1607        continue;
   1608      }
   1609 
   1610      const status = mergedTables.statuses[statusId];
   1611      const isPass = status.startsWith("PASS");
   1612 
   1613      if (isPass) {
   1614        finalTestRuns[testId][statusId] = aggregateRunsByHour(statusGroup);
   1615      } else {
   1616        finalTestRuns[testId][statusId] = aggregateRunsByHour(
   1617          statusGroup,
   1618          true,
   1619          true
   1620        );
   1621      }
   1622    }
   1623  }
   1624 
   1625  const testsWithFailures = finalTestRuns.filter(testGroup =>
   1626    testGroup?.some(
   1627      (sg, idx) => sg && !mergedTables.statuses[idx].startsWith("PASS")
   1628    )
   1629  ).length;
   1630 
   1631  console.log("Sorting string tables by frequency...");
   1632 
   1633  // Sort string tables by frequency for better compression
   1634  const dataStructure = {
   1635    tables: mergedTables,
   1636    taskInfo: mergedTaskInfo,
   1637    testInfo: mergedTestInfo,
   1638    testRuns: finalTestRuns,
   1639  };
   1640 
   1641  const sortedData = sortStringTablesByFrequency(dataStructure);
   1642 
   1643  const outputData = {
   1644    metadata: {
   1645      startDate,
   1646      endDate,
   1647      days: dates.length,
   1648      startTime,
   1649      generatedAt: new Date().toISOString(),
   1650      totalTestCount: mergedTestInfo.testPathIds.length,
   1651      testsWithFailures,
   1652      aggregatedFrom: dailyFiles.map(f => path.basename(f.filePath)),
   1653    },
   1654    tables: sortedData.tables,
   1655    taskInfo: sortedData.taskInfo,
   1656    testInfo: sortedData.testInfo,
   1657    testRuns: sortedData.testRuns,
   1658  };
   1659 
   1660  const outputFileWithDetails = path.join(
   1661    OUTPUT_DIR,
   1662    "xpcshell-issues-with-taskids.json"
   1663  );
   1664  saveJsonFile(outputData, outputFileWithDetails);
   1665 
   1666  // Create small file with all statuses aggregated
   1667  console.log("Creating small aggregated version...");
   1668 
   1669  const smallTestRuns = sortedData.testRuns.map(testGroup => {
   1670    if (!testGroup) {
   1671      return testGroup;
   1672    }
   1673    return testGroup.map(statusGroup => {
   1674      if (!statusGroup) {
   1675        return statusGroup;
   1676      }
   1677      if (statusGroup.counts) {
   1678        return statusGroup;
   1679      }
   1680 
   1681      const result = {
   1682        counts: statusGroup.taskIdIds.map(arr => arr.length),
   1683        hours: statusGroup.hours,
   1684      };
   1685 
   1686      if (statusGroup.messageIds) {
   1687        result.messageIds = statusGroup.messageIds;
   1688      }
   1689 
   1690      if (statusGroup.crashSignatureIds) {
   1691        result.crashSignatureIds = statusGroup.crashSignatureIds;
   1692      }
   1693 
   1694      return result;
   1695    });
   1696  });
   1697 
   1698  const smallOutput = {
   1699    metadata: outputData.metadata,
   1700    tables: {
   1701      testPaths: sortedData.tables.testPaths,
   1702      testNames: sortedData.tables.testNames,
   1703      statuses: sortedData.tables.statuses,
   1704      messages: sortedData.tables.messages,
   1705      crashSignatures: sortedData.tables.crashSignatures,
   1706      components: sortedData.tables.components,
   1707    },
   1708    testInfo: sortedData.testInfo,
   1709    testRuns: smallTestRuns,
   1710  };
   1711 
   1712  const outputFileSmall = path.join(OUTPUT_DIR, "xpcshell-issues.json");
   1713  saveJsonFile(smallOutput, outputFileSmall);
   1714 
   1715  console.log(
   1716    `Successfully created aggregated files with ${outputData.metadata.totalTestCount} tests`
   1717  );
   1718  console.log(`  Tests with failures: ${testsWithFailures}`);
   1719 }
   1720 
   1721 async function main() {
   1722  const forceRefetch = process.argv.includes("--force");
   1723 
   1724  // Check for --days parameter
   1725  let numDays = 3;
   1726  const daysIndex = process.argv.findIndex(arg => arg === "--days");
   1727  if (daysIndex !== -1 && daysIndex + 1 < process.argv.length) {
   1728    const daysValue = parseInt(process.argv[daysIndex + 1]);
   1729    if (!isNaN(daysValue) && daysValue > 0 && daysValue <= 30) {
   1730      numDays = daysValue;
   1731    } else {
   1732      console.error("Error: --days must be a number between 1 and 30");
   1733      process.exit(1);
   1734    }
   1735  }
   1736 
   1737  if (process.env.TASK_ID) {
   1738    await fetchPreviousRunData();
   1739  }
   1740 
   1741  // Fetch component mapping data
   1742  await fetchComponentsData();
   1743 
   1744  // Check for --revision parameter (format: project:revision)
   1745  const revisionIndex = process.argv.findIndex(arg => arg === "--revision");
   1746  if (revisionIndex !== -1 && revisionIndex + 1 < process.argv.length) {
   1747    const revisionArg = process.argv[revisionIndex + 1];
   1748    const parts = revisionArg.split(":");
   1749 
   1750    if (parts.length !== 2) {
   1751      console.error(
   1752        "Error: --revision must be in format project:revision (e.g., try:abc123 or autoland:def456)"
   1753      );
   1754      process.exit(1);
   1755    }
   1756 
   1757    const [project, revision] = parts;
   1758    const output = await processRevisionData(project, revision, forceRefetch);
   1759 
   1760    if (output) {
   1761      console.log("Successfully processed revision data.");
   1762    } else {
   1763      console.log("\nNo data was successfully processed.");
   1764    }
   1765    return;
   1766  }
   1767 
   1768  // Check for --try option (shortcut for --revision try:...)
   1769  const tryIndex = process.argv.findIndex(arg => arg === "--try");
   1770  if (tryIndex !== -1 && tryIndex + 1 < process.argv.length) {
   1771    const revision = process.argv[tryIndex + 1];
   1772    const output = await processRevisionData("try", revision, forceRefetch);
   1773 
   1774    if (output) {
   1775      console.log("Successfully processed try commit data.");
   1776    } else {
   1777      console.log("\nNo data was successfully processed.");
   1778    }
   1779    return;
   1780  }
   1781 
   1782  // Fetch data for the specified number of days
   1783  const dates = [];
   1784  for (let i = 1; i <= numDays; i++) {
   1785    dates.push(getDateString(i));
   1786  }
   1787 
   1788  console.log(
   1789    `Fetching xpcshell test data for the last ${numDays} day${numDays > 1 ? "s" : ""}: ${dates.join(", ")}`
   1790  );
   1791 
   1792  for (const date of dates) {
   1793    console.log(`\n=== Processing ${date} ===`);
   1794    await processDateData(date, forceRefetch);
   1795  }
   1796 
   1797  // Create aggregated failures file if processing multiple days
   1798  if (numDays > 1) {
   1799    await createAggregatedFailuresFile(dates);
   1800  }
   1801 
   1802  // Create index file with available dates
   1803  const indexFile = path.join(OUTPUT_DIR, "index.json");
   1804  const availableDates = [];
   1805 
   1806  // Scan for all xpcshell-*.json files in the output directory
   1807  const files = fs.readdirSync(OUTPUT_DIR);
   1808  files.forEach(file => {
   1809    const match = file.match(/^xpcshell-(\d{4}-\d{2}-\d{2})\.json$/);
   1810    if (match) {
   1811      availableDates.push(match[1]);
   1812    }
   1813  });
   1814 
   1815  // Sort dates in descending order (newest first)
   1816  availableDates.sort((a, b) => b.localeCompare(a));
   1817 
   1818  fs.writeFileSync(
   1819    indexFile,
   1820    JSON.stringify({ dates: availableDates }, null, 2)
   1821  );
   1822  console.log(
   1823    `\nIndex file saved as ${indexFile} with ${availableDates.length} dates`
   1824  );
   1825 }
   1826 
   1827 main().catch(console.error);