SearchBrowsingHistory.sys.mjs (13785B)
1 /** 2 * This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 */ 6 7 const lazy = {}; 8 ChromeUtils.defineESModuleGetters(lazy, { 9 PageThumbs: "resource://gre/modules/PageThumbs.sys.mjs", 10 PageThumbsStorage: "resource://gre/modules/PageThumbs.sys.mjs", 11 PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs", 12 getPlacesSemanticHistoryManager: 13 "resource://gre/modules/PlacesSemanticHistoryManager.sys.mjs", 14 // Domain fallback / workaround for general-category queries (games, movies, etc.) 15 SearchBrowsingHistoryDomainBoost: 16 "moz-src:///browser/components/aiwindow/models/SearchBrowsingHistoryDomainBoost.sys.mjs", 17 }); 18 19 /** 20 * Convert ISO timestamp string to microseconds (moz_places format). 21 * 22 * @param {string|null} iso 23 * @returns {number|null} 24 */ 25 function isoToMicroseconds(iso) { 26 if (!iso) { 27 return null; 28 } 29 const ms = new Date(iso).getTime(); 30 return Number.isFinite(ms) ? ms * 1000 : null; 31 } 32 33 /** 34 * Normalize a history row from either: 35 * - semantic SQL result (mozIStorageRow), or 36 * - Places history node (plain object from nsINavHistoryResultNode). 37 * 38 * @param {object} row 39 * @param {boolean} [fromNode=false] // true if row came from Places node 40 * @returns {Promise<object>} // normalized history entry 41 */ 42 async function buildHistoryRow(row, fromNode = false) { 43 let title, url, visitDateIso, visitCount, distance, frecency, previewImageURL; 44 45 if (!fromNode) { 46 // from semantic / SQL result (mozIStorageRow) 47 title = row.getResultByName("title"); 48 url = row.getResultByName("url"); 49 visitCount = row.getResultByName("visit_count"); 50 distance = row.getResultByName("distance"); 51 frecency = row.getResultByName("frecency"); 52 previewImageURL = row.getResultByName("preview_image_url"); 53 54 // convert last_visit_date to ISO format 55 const lastVisitRaw = row.getResultByName("last_visit_date"); 56 // last_visit_date is in microseconds from moz_places 57 if (typeof lastVisitRaw === "number") { 58 visitDateIso = new Date(Math.round(lastVisitRaw / 1000)).toISOString(); 59 } else if (lastVisitRaw instanceof Date) { 60 visitDateIso = lastVisitRaw.toISOString(); 61 } else { 62 visitDateIso = null; 63 } 64 } else { 65 // from basic / Places history node (nsINavHistoryResultNode) 66 title = row.title; 67 url = row.uri; 68 visitCount = row.accessCount; 69 frecency = row.frecency; 70 71 // convert time to ISO format 72 const lastVisitDate = lazy.PlacesUtils.toDate(row.time); 73 visitDateIso = lastVisitDate ? lastVisitDate.toISOString() : null; 74 } 75 76 let relevanceScore; 77 if (typeof distance === "number") { 78 relevanceScore = 1 - distance; 79 } else { 80 relevanceScore = frecency; 81 } 82 83 // Get thumbnail URL for the page if preview_image_url does not exist 84 try { 85 if (!previewImageURL) { 86 if (await lazy.PageThumbsStorage.fileExistsForURL(url)) { 87 previewImageURL = lazy.PageThumbs.getThumbnailURL(url); 88 } 89 } 90 } catch (e) { 91 // If thumbnail lookup fails, skip it 92 } 93 94 // Get favicon URL for the page 95 let faviconUrl = null; 96 try { 97 const faviconURI = Services.io.newURI(url); 98 faviconUrl = `page-icon:${faviconURI.spec}`; 99 } catch (e) { 100 // If favicon lookup fails, skip it 101 } 102 103 return { 104 title: title || url, 105 url, 106 visitDate: visitDateIso, // ISO timestamp format 107 visitCount: visitCount || 0, 108 relevanceScore: relevanceScore || 0, // Use embedding's distance as relevance score when available 109 ...(faviconUrl && { favicon: faviconUrl }), // Only include favicon if available 110 ...(previewImageURL && { thumbnail: previewImageURL }), // Only include thumbnail if available 111 }; 112 } 113 114 /** 115 * Plain time-range browsing history search without search term (no semantic search). 116 * 117 * @param {object} params 118 * @param {number|null} params.startTs 119 * @param {number|null} params.endTs 120 * @param {number} params.historyLimit 121 * @returns {Promise<object[]>} 122 */ 123 async function searchBrowsingHistoryTimeRange({ 124 startTs, 125 endTs, 126 historyLimit, 127 }) { 128 const semanticManager = lazy.getPlacesSemanticHistoryManager(); 129 const conn = await semanticManager.getConnection(); 130 131 const results = await conn.executeCached( 132 ` 133 SELECT id, 134 title, 135 url, 136 NULL AS distance, 137 visit_count, 138 frecency, 139 last_visit_date, 140 preview_image_url 141 FROM moz_places 142 WHERE frecency <> 0 143 AND (:startTs IS NULL OR last_visit_date >= :startTs) 144 AND (:endTs IS NULL OR last_visit_date <= :endTs) 145 ORDER BY last_visit_date DESC, frecency DESC 146 LIMIT :limit 147 `, 148 { 149 startTs, 150 endTs, 151 limit: historyLimit, 152 } 153 ); 154 155 const rows = []; 156 for (let row of results) { 157 rows.push(await buildHistoryRow(row)); 158 } 159 return rows; 160 } 161 162 /** 163 * Normalize tensor/output format from the embedder into a single vector. 164 * 165 * @param {Array|object} tensor 166 * @returns {Array|Float32Array} 167 */ 168 function extractVectorFromTensor(tensor) { 169 if (!tensor) { 170 throw new Error("Unexpected empty tensor"); 171 } 172 173 // Case 1: { output: ... } or { metrics, output } 174 if (tensor.output) { 175 if ( 176 Array.isArray(tensor.output) && 177 (Array.isArray(tensor.output[0]) || ArrayBuffer.isView(tensor.output[0])) 178 ) { 179 // output is an array of vectors, return the first 180 return tensor.output[0]; 181 } 182 // output is already a single vector 183 return tensor.output; 184 } 185 186 // Case 2: tensor is nested like [[...]] 187 if ( 188 Array.isArray(tensor) && 189 tensor.length === 1 && 190 Array.isArray(tensor[0]) 191 ) { 192 tensor = tensor[0]; 193 } 194 195 // Then we check if it's an array of arrays or just a single value. 196 if ( 197 Array.isArray(tensor) && 198 (Array.isArray(tensor[0]) || ArrayBuffer.isView(tensor[0])) 199 ) { 200 return tensor[0]; 201 } 202 203 return tensor; 204 } 205 206 /** 207 * Semantic browsing history search using embeddings. 208 * 209 * This performs a two-stage retrieval for performance: 210 * 1. Coarse search: over the quantized embeddings (`embedding_coarse`) to 211 * quickly select up to 100 candidate rows. This hard limit keeps the 212 * expensive cosine-distance computation bounded. 213 * 2. Refined search: computes the exact cosine distance for those candidates 214 * and applies the caller-provided `historyLimit` and `distanceThreshold` 215 * filters. 216 * 217 * @param {object} params 218 * @param {string} params.searchTerm 219 * @param {number|null} params.startTs 220 * @param {number|null} params.endTs 221 * @param {number} params.historyLimit 222 * @param {number} params.distanceThreshold 223 * @returns {Promise<object[]>} 224 */ 225 async function searchBrowsingHistorySemantic({ 226 searchTerm, 227 startTs, 228 endTs, 229 historyLimit, 230 distanceThreshold, 231 }) { 232 const semanticManager = lazy.getPlacesSemanticHistoryManager(); 233 await semanticManager.embedder.ensureEngine(); 234 235 // Embed search term 236 let tensor = await semanticManager.embedder.embed(searchTerm); 237 const vec = extractVectorFromTensor(tensor); 238 const vector = lazy.PlacesUtils.tensorToSQLBindable(vec); 239 240 let conn = await semanticManager.getConnection(); 241 const results = await conn.executeCached( 242 ` 243 WITH coarse_matches AS ( 244 SELECT rowid, 245 embedding 246 FROM vec_history 247 WHERE embedding_coarse match vec_quantize_binary(:vector) 248 ORDER BY distance 249 LIMIT 100 250 ), 251 matches AS ( 252 SELECT url_hash, vec_distance_cosine(embedding, :vector) AS distance 253 FROM vec_history_mapping 254 JOIN coarse_matches USING (rowid) 255 WHERE distance <= :distanceThreshold 256 ORDER BY distance 257 LIMIT :limit 258 ) 259 SELECT id, 260 title, 261 url, 262 distance, 263 visit_count, 264 frecency, 265 last_visit_date, 266 preview_image_url 267 FROM moz_places 268 JOIN matches USING (url_hash) 269 WHERE frecency <> 0 270 AND (:startTs IS NULL OR last_visit_date >= :startTs) 271 AND (:endTs IS NULL OR last_visit_date <= :endTs) 272 ORDER BY distance 273 `, 274 { 275 vector, 276 distanceThreshold, 277 limit: historyLimit, 278 startTs, 279 endTs, 280 } 281 ); 282 283 const rows = []; 284 for (let row of results) { 285 rows.push(await buildHistoryRow(row)); 286 } 287 288 // Domain fallback for general-category queries (games, movies, news, etc.) 289 // Keep semantic ranking primary, only top-up if we have room. 290 if (rows.length < historyLimit) { 291 const domains = 292 lazy.SearchBrowsingHistoryDomainBoost.matchDomains(searchTerm); 293 if (domains?.length) { 294 const domainRows = 295 await lazy.SearchBrowsingHistoryDomainBoost.searchByDomains({ 296 conn, 297 domains, 298 startTs, 299 endTs, 300 historyLimit: Math.max(historyLimit * 2, 200), // extra for dedupe 301 buildHistoryRow, 302 }); 303 304 return lazy.SearchBrowsingHistoryDomainBoost.mergeDedupe( 305 rows, 306 domainRows, 307 historyLimit 308 ); 309 } 310 } 311 312 return rows; 313 } 314 315 /** 316 * Browsing history search using the default history search. 317 * 318 * @param {object} params 319 * @param {string} params.searchTerm 320 * @param {number} params.historyLimit 321 * @returns {Promise<object[]>} 322 */ 323 async function searchBrowsingHistoryBasic({ searchTerm, historyLimit }) { 324 let root; 325 let openedRoot = false; 326 327 try { 328 const currentHistory = lazy.PlacesUtils.history; 329 const query = currentHistory.getNewQuery(); 330 const opts = currentHistory.getNewQueryOptions(); 331 332 // Use Places' built-in text filtering 333 query.searchTerms = searchTerm; 334 335 // Simple URI results, ranked by frecency 336 opts.resultType = Ci.nsINavHistoryQueryOptions.RESULTS_AS_URI; 337 opts.sortingMode = Ci.nsINavHistoryQueryOptions.SORT_BY_FRECENCY_DESCENDING; 338 opts.maxResults = historyLimit; 339 opts.excludeQueries = false; 340 opts.queryType = Ci.nsINavHistoryQueryOptions.QUERY_TYPE_HISTORY; 341 342 const result = currentHistory.executeQuery(query, opts); 343 root = result.root; 344 345 if (!root.containerOpen) { 346 root.containerOpen = true; 347 openedRoot = true; 348 } 349 350 const rows = []; 351 for (let i = 0; i < root.childCount && rows.length < historyLimit; i++) { 352 const node = root.getChild(i); 353 rows.push(await buildHistoryRow(node, true)); 354 } 355 return rows; 356 } catch (error) { 357 console.error("Error searching browser history:", error); 358 return []; 359 } finally { 360 if (root && openedRoot) { 361 root.containerOpen = false; 362 } 363 } 364 } 365 366 /** 367 * Searches browser history using semantic search when possible, otherwise basic 368 * text search or time-range filtering. 369 * 370 * Rules: 371 * - Empty searchTerm: time-range search (if start/end given) or recent history. 372 * - Non-empty searchTerm: semantic search when available, otherwise basic text 373 * search (ignore time filtering). 374 * 375 * @param {object} params 376 * The search parameters. 377 * @param {string} params.searchTerm 378 * The search string. If null or empty, semantic search is skipped and 379 * results are filtered by time range and sorted by last_visit_date and frecency. 380 * @param {string|null} params.startTs 381 * Optional local ISO-8601 start timestamp (e.g. "2025-11-07T09:00:00"). 382 * @param {string|null} params.endTs 383 * Optional local ISO-8601 end timestamp (e.g. "2025-11-07T09:00:00"). 384 * @param {number} params.historyLimit 385 * Maximum number of history results to return. 386 * @returns {Promise<object>} 387 * A promise resolving to an object with the search term and history results. 388 * Includes `count` when matches exist, a `message` when none are found, or an 389 * `error` string on failure. 390 */ 391 export async function searchBrowsingHistory({ 392 searchTerm = "", 393 startTs = null, 394 endTs = null, 395 historyLimit = 15, 396 }) { 397 let rows = []; 398 399 try { 400 // Convert ISO timestamp strings to microseconds to match the format used in moz_places 401 const startUs = isoToMicroseconds(startTs); 402 const endUs = isoToMicroseconds(endTs); 403 404 const distanceThreshold = Services.prefs.getFloatPref( 405 "places.semanticHistory.distanceThreshold", 406 0.6 407 ); 408 409 const semanticManager = lazy.getPlacesSemanticHistoryManager(); 410 411 // If semantic search cannot be used or we don't have enough entries, always 412 // fall back to plain time-range search. 413 const canUseSemantic = 414 semanticManager.canUseSemanticSearch && 415 (await semanticManager.hasSufficientEntriesForSearching()); 416 417 if (!searchTerm?.trim()) { 418 // Plain time-range search (no searchTerm) 419 rows = await searchBrowsingHistoryTimeRange({ 420 startTs: startUs, 421 endTs: endUs, 422 historyLimit, 423 }); 424 } else if (canUseSemantic) { 425 // Semantic search 426 rows = await searchBrowsingHistorySemantic({ 427 searchTerm, 428 startTs: startUs, 429 endTs: endUs, 430 historyLimit, 431 distanceThreshold, 432 }); 433 } else { 434 // Fallback to basic search without time window if semantic search not enable or insufficient records. 435 rows = await searchBrowsingHistoryBasic({ 436 searchTerm, 437 historyLimit, 438 }); 439 } 440 441 if (rows.length === 0) { 442 return JSON.stringify({ 443 searchTerm, 444 results: [], 445 message: searchTerm 446 ? `No browser history found for "${searchTerm}".` 447 : "No browser history found in the requested time range.", 448 }); 449 } 450 451 // Return as JSON string with metadata 452 return JSON.stringify({ 453 searchTerm, 454 count: rows.length, 455 results: rows, 456 }); 457 } catch (error) { 458 console.error("Error searching browser history:", error); 459 return JSON.stringify({ 460 searchTerm, 461 error: `Error searching browser history: ${error.message}`, 462 results: [], 463 }); 464 } 465 }