tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SearchBrowsingHistory.sys.mjs (13785B)


      1 /**
      2 * This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 */
      6 
      7 const lazy = {};
      8 ChromeUtils.defineESModuleGetters(lazy, {
      9  PageThumbs: "resource://gre/modules/PageThumbs.sys.mjs",
     10  PageThumbsStorage: "resource://gre/modules/PageThumbs.sys.mjs",
     11  PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs",
     12  getPlacesSemanticHistoryManager:
     13    "resource://gre/modules/PlacesSemanticHistoryManager.sys.mjs",
     14  // Domain fallback / workaround for general-category queries (games, movies, etc.)
     15  SearchBrowsingHistoryDomainBoost:
     16    "moz-src:///browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs",
     17 });
     18 
     19 /**
     20 * Convert ISO timestamp string to microseconds (moz_places format).
     21 *
     22 * @param {string|null} iso
     23 * @returns {number|null}
     24 */
     25 function isoToMicroseconds(iso) {
     26  if (!iso) {
     27    return null;
     28  }
     29  const ms = new Date(iso).getTime();
     30  return Number.isFinite(ms) ? ms * 1000 : null;
     31 }
     32 
     33 /**
     34 * Normalize a history row from either:
     35 * - semantic SQL result (mozIStorageRow), or
     36 * - Places history node (plain object from nsINavHistoryResultNode).
     37 *
     38 * @param {object} row
     39 * @param {boolean} [fromNode=false]  // true if row came from Places node
     40 * @returns {Promise<object>}         // normalized history entry
     41 */
     42 async function buildHistoryRow(row, fromNode = false) {
     43  let title, url, visitDateIso, visitCount, distance, frecency, previewImageURL;
     44 
     45  if (!fromNode) {
     46    // from semantic / SQL result (mozIStorageRow)
     47    title = row.getResultByName("title");
     48    url = row.getResultByName("url");
     49    visitCount = row.getResultByName("visit_count");
     50    distance = row.getResultByName("distance");
     51    frecency = row.getResultByName("frecency");
     52    previewImageURL = row.getResultByName("preview_image_url");
     53 
     54    // convert last_visit_date to ISO format
     55    const lastVisitRaw = row.getResultByName("last_visit_date");
     56    // last_visit_date is in microseconds from moz_places
     57    if (typeof lastVisitRaw === "number") {
     58      visitDateIso = new Date(Math.round(lastVisitRaw / 1000)).toISOString();
     59    } else if (lastVisitRaw instanceof Date) {
     60      visitDateIso = lastVisitRaw.toISOString();
     61    } else {
     62      visitDateIso = null;
     63    }
     64  } else {
     65    // from basic / Places history node (nsINavHistoryResultNode)
     66    title = row.title;
     67    url = row.uri;
     68    visitCount = row.accessCount;
     69    frecency = row.frecency;
     70 
     71    // convert time to ISO format
     72    const lastVisitDate = lazy.PlacesUtils.toDate(row.time);
     73    visitDateIso = lastVisitDate ? lastVisitDate.toISOString() : null;
     74  }
     75 
     76  let relevanceScore;
     77  if (typeof distance === "number") {
     78    relevanceScore = 1 - distance;
     79  } else {
     80    relevanceScore = frecency;
     81  }
     82 
     83  // Get thumbnail URL for the page if preview_image_url does not exist
     84  try {
     85    if (!previewImageURL) {
     86      if (await lazy.PageThumbsStorage.fileExistsForURL(url)) {
     87        previewImageURL = lazy.PageThumbs.getThumbnailURL(url);
     88      }
     89    }
     90  } catch (e) {
     91    // If thumbnail lookup fails, skip it
     92  }
     93 
     94  // Get favicon URL for the page
     95  let faviconUrl = null;
     96  try {
     97    const faviconURI = Services.io.newURI(url);
     98    faviconUrl = `page-icon:${faviconURI.spec}`;
     99  } catch (e) {
    100    // If favicon lookup fails, skip it
    101  }
    102 
    103  return {
    104    title: title || url,
    105    url,
    106    visitDate: visitDateIso, // ISO timestamp format
    107    visitCount: visitCount || 0,
    108    relevanceScore: relevanceScore || 0, // Use embedding's distance as relevance score when available
    109    ...(faviconUrl && { favicon: faviconUrl }), // Only include favicon if available
    110    ...(previewImageURL && { thumbnail: previewImageURL }), // Only include thumbnail if available
    111  };
    112 }
    113 
    114 /**
    115 * Plain time-range browsing history search without search term (no semantic search).
    116 *
    117 * @param {object} params
    118 * @param {number|null} params.startTs
    119 * @param {number|null} params.endTs
    120 * @param {number} params.historyLimit
    121 * @returns {Promise<object[]>}
    122 */
    123 async function searchBrowsingHistoryTimeRange({
    124  startTs,
    125  endTs,
    126  historyLimit,
    127 }) {
    128  const semanticManager = lazy.getPlacesSemanticHistoryManager();
    129  const conn = await semanticManager.getConnection();
    130 
    131  const results = await conn.executeCached(
    132    `
    133      SELECT id,
    134             title,
    135             url,
    136             NULL AS distance,
    137             visit_count,
    138             frecency,
    139             last_visit_date,
    140             preview_image_url
    141      FROM moz_places
    142      WHERE frecency <> 0
    143      AND (:startTs IS NULL OR last_visit_date >= :startTs)
    144      AND (:endTs IS NULL OR last_visit_date <= :endTs)
    145      ORDER BY last_visit_date DESC, frecency DESC
    146      LIMIT :limit
    147    `,
    148    {
    149      startTs,
    150      endTs,
    151      limit: historyLimit,
    152    }
    153  );
    154 
    155  const rows = [];
    156  for (let row of results) {
    157    rows.push(await buildHistoryRow(row));
    158  }
    159  return rows;
    160 }
    161 
    162 /**
    163 * Normalize tensor/output format from the embedder into a single vector.
    164 *
    165 * @param {Array|object} tensor
    166 * @returns {Array|Float32Array}
    167 */
    168 function extractVectorFromTensor(tensor) {
    169  if (!tensor) {
    170    throw new Error("Unexpected empty tensor");
    171  }
    172 
    173  // Case 1: { output: ... } or { metrics, output }
    174  if (tensor.output) {
    175    if (
    176      Array.isArray(tensor.output) &&
    177      (Array.isArray(tensor.output[0]) || ArrayBuffer.isView(tensor.output[0]))
    178    ) {
    179      // output is an array of vectors, return the first
    180      return tensor.output[0];
    181    }
    182    // output is already a single vector
    183    return tensor.output;
    184  }
    185 
    186  // Case 2: tensor is nested like [[...]]
    187  if (
    188    Array.isArray(tensor) &&
    189    tensor.length === 1 &&
    190    Array.isArray(tensor[0])
    191  ) {
    192    tensor = tensor[0];
    193  }
    194 
    195  // Then we check if it's an array of arrays or just a single value.
    196  if (
    197    Array.isArray(tensor) &&
    198    (Array.isArray(tensor[0]) || ArrayBuffer.isView(tensor[0]))
    199  ) {
    200    return tensor[0];
    201  }
    202 
    203  return tensor;
    204 }
    205 
    206 /**
    207 * Semantic browsing history search using embeddings.
    208 *
    209 * This performs a two-stage retrieval for performance:
    210 * 1. Coarse search: over the quantized embeddings (`embedding_coarse`) to
    211 *    quickly select up to 100 candidate rows. This hard limit keeps the
    212 *    expensive cosine-distance computation bounded.
    213 * 2. Refined search: computes the exact cosine distance for those candidates
    214 *    and applies the caller-provided `historyLimit` and `distanceThreshold`
    215 *    filters.
    216 *
    217 * @param {object} params
    218 * @param {string} params.searchTerm
    219 * @param {number|null} params.startTs
    220 * @param {number|null} params.endTs
    221 * @param {number} params.historyLimit
    222 * @param {number} params.distanceThreshold
    223 * @returns {Promise<object[]>}
    224 */
    225 async function searchBrowsingHistorySemantic({
    226  searchTerm,
    227  startTs,
    228  endTs,
    229  historyLimit,
    230  distanceThreshold,
    231 }) {
    232  const semanticManager = lazy.getPlacesSemanticHistoryManager();
    233  await semanticManager.embedder.ensureEngine();
    234 
    235  // Embed search term
    236  let tensor = await semanticManager.embedder.embed(searchTerm);
    237  const vec = extractVectorFromTensor(tensor);
    238  const vector = lazy.PlacesUtils.tensorToSQLBindable(vec);
    239 
    240  let conn = await semanticManager.getConnection();
    241  const results = await conn.executeCached(
    242    `
    243    WITH coarse_matches AS (
    244      SELECT rowid,
    245             embedding
    246      FROM vec_history
    247      WHERE embedding_coarse match vec_quantize_binary(:vector)
    248      ORDER BY distance
    249      LIMIT 100
    250    ),
    251    matches AS (
    252      SELECT url_hash, vec_distance_cosine(embedding, :vector) AS distance
    253      FROM vec_history_mapping
    254      JOIN coarse_matches USING (rowid)
    255      WHERE distance <= :distanceThreshold
    256      ORDER BY distance
    257      LIMIT :limit
    258    )
    259    SELECT id,
    260           title,
    261           url,
    262           distance,
    263           visit_count,
    264           frecency,
    265           last_visit_date,
    266           preview_image_url
    267    FROM moz_places
    268    JOIN matches USING (url_hash)
    269    WHERE frecency <> 0
    270    AND (:startTs IS NULL OR last_visit_date >= :startTs)
    271    AND (:endTs IS NULL OR last_visit_date <= :endTs)
    272    ORDER BY distance
    273    `,
    274    {
    275      vector,
    276      distanceThreshold,
    277      limit: historyLimit,
    278      startTs,
    279      endTs,
    280    }
    281  );
    282 
    283  const rows = [];
    284  for (let row of results) {
    285    rows.push(await buildHistoryRow(row));
    286  }
    287 
    288  // Domain fallback for general-category queries (games, movies, news, etc.)
    289  // Keep semantic ranking primary, only top-up if we have room.
    290  if (rows.length < historyLimit) {
    291    const domains =
    292      lazy.SearchBrowsingHistoryDomainBoost.matchDomains(searchTerm);
    293    if (domains?.length) {
    294      const domainRows =
    295        await lazy.SearchBrowsingHistoryDomainBoost.searchByDomains({
    296          conn,
    297          domains,
    298          startTs,
    299          endTs,
    300          historyLimit: Math.max(historyLimit * 2, 200), // extra for dedupe
    301          buildHistoryRow,
    302        });
    303 
    304      return lazy.SearchBrowsingHistoryDomainBoost.mergeDedupe(
    305        rows,
    306        domainRows,
    307        historyLimit
    308      );
    309    }
    310  }
    311 
    312  return rows;
    313 }
    314 
    315 /**
    316 * Browsing history search using the default history search.
    317 *
    318 * @param {object} params
    319 * @param {string} params.searchTerm
    320 * @param {number} params.historyLimit
    321 * @returns {Promise<object[]>}
    322 */
    323 async function searchBrowsingHistoryBasic({ searchTerm, historyLimit }) {
    324  let root;
    325  let openedRoot = false;
    326 
    327  try {
    328    const currentHistory = lazy.PlacesUtils.history;
    329    const query = currentHistory.getNewQuery();
    330    const opts = currentHistory.getNewQueryOptions();
    331 
    332    // Use Places' built-in text filtering
    333    query.searchTerms = searchTerm;
    334 
    335    // Simple URI results, ranked by frecency
    336    opts.resultType = Ci.nsINavHistoryQueryOptions.RESULTS_AS_URI;
    337    opts.sortingMode = Ci.nsINavHistoryQueryOptions.SORT_BY_FRECENCY_DESCENDING;
    338    opts.maxResults = historyLimit;
    339    opts.excludeQueries = false;
    340    opts.queryType = Ci.nsINavHistoryQueryOptions.QUERY_TYPE_HISTORY;
    341 
    342    const result = currentHistory.executeQuery(query, opts);
    343    root = result.root;
    344 
    345    if (!root.containerOpen) {
    346      root.containerOpen = true;
    347      openedRoot = true;
    348    }
    349 
    350    const rows = [];
    351    for (let i = 0; i < root.childCount && rows.length < historyLimit; i++) {
    352      const node = root.getChild(i);
    353      rows.push(await buildHistoryRow(node, true));
    354    }
    355    return rows;
    356  } catch (error) {
    357    console.error("Error searching browser history:", error);
    358    return [];
    359  } finally {
    360    if (root && openedRoot) {
    361      root.containerOpen = false;
    362    }
    363  }
    364 }
    365 
    366 /**
    367 * Searches browser history using semantic search when possible, otherwise basic
    368 * text search or time-range filtering.
    369 *
    370 * Rules:
    371 *   - Empty searchTerm: time-range search (if start/end given) or recent history.
    372 *   - Non-empty searchTerm: semantic search when available, otherwise basic text
    373 *     search (ignore time filtering).
    374 *
    375 * @param {object} params
    376 *  The search parameters.
    377 * @param {string} params.searchTerm
    378 *  The search string. If null or empty, semantic search is skipped and
    379 *  results are filtered by time range and sorted by last_visit_date and frecency.
    380 * @param {string|null} params.startTs
    381 *  Optional local ISO-8601 start timestamp (e.g. "2025-11-07T09:00:00").
    382 * @param {string|null} params.endTs
    383 *  Optional local ISO-8601 end timestamp (e.g. "2025-11-07T09:00:00").
    384 * @param {number} params.historyLimit
    385 *  Maximum number of history results to return.
    386 * @returns {Promise<object>}
    387 *  A promise resolving to an object with the search term and history results.
    388 *  Includes `count` when matches exist, a `message` when none are found, or an
    389 *  `error` string on failure.
    390 */
    391 export async function searchBrowsingHistory({
    392  searchTerm = "",
    393  startTs = null,
    394  endTs = null,
    395  historyLimit = 15,
    396 }) {
    397  let rows = [];
    398 
    399  try {
    400    // Convert ISO timestamp strings to microseconds to match the format used in moz_places
    401    const startUs = isoToMicroseconds(startTs);
    402    const endUs = isoToMicroseconds(endTs);
    403 
    404    const distanceThreshold = Services.prefs.getFloatPref(
    405      "places.semanticHistory.distanceThreshold",
    406      0.6
    407    );
    408 
    409    const semanticManager = lazy.getPlacesSemanticHistoryManager();
    410 
    411    // If semantic search cannot be used or we don't have enough entries, always
    412    // fall back to plain time-range search.
    413    const canUseSemantic =
    414      semanticManager.canUseSemanticSearch &&
    415      (await semanticManager.hasSufficientEntriesForSearching());
    416 
    417    if (!searchTerm?.trim()) {
    418      // Plain time-range search (no searchTerm)
    419      rows = await searchBrowsingHistoryTimeRange({
    420        startTs: startUs,
    421        endTs: endUs,
    422        historyLimit,
    423      });
    424    } else if (canUseSemantic) {
    425      // Semantic search
    426      rows = await searchBrowsingHistorySemantic({
    427        searchTerm,
    428        startTs: startUs,
    429        endTs: endUs,
    430        historyLimit,
    431        distanceThreshold,
    432      });
    433    } else {
    434      // Fallback to basic search without time window if semantic search not enable or insufficient records.
    435      rows = await searchBrowsingHistoryBasic({
    436        searchTerm,
    437        historyLimit,
    438      });
    439    }
    440 
    441    if (rows.length === 0) {
    442      return JSON.stringify({
    443        searchTerm,
    444        results: [],
    445        message: searchTerm
    446          ? `No browser history found for "${searchTerm}".`
    447          : "No browser history found in the requested time range.",
    448      });
    449    }
    450 
    451    // Return as JSON string with metadata
    452    return JSON.stringify({
    453      searchTerm,
    454      count: rows.length,
    455      results: rows,
    456    });
    457  } catch (error) {
    458    console.error("Error searching browser history:", error);
    459    return JSON.stringify({
    460      searchTerm,
    461      error: `Error searching browser history: ${error.message}`,
    462      results: [],
    463    });
    464  }
    465 }