textrecognition.mjs (12720B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 window.docShell.chromeEventHandler.classList.add("textRecognitionDialogFrame"); 6 7 window.addEventListener("DOMContentLoaded", () => { 8 // The arguments are passed in as the final parameters to TabDialogBox.prototype.open. 9 new TextRecognitionModal(...window.arguments); 10 }); 11 12 /** 13 * @typedef {object} TextRecognitionResult 14 * @property {number} confidence 15 * @property {string} string 16 * @property {DOMQuad} quad 17 */ 18 19 class TextRecognitionModal { 20 /** 21 * @param {Promise<TextRecognitionResult[]>} resultsPromise 22 * @param {Function} resizeVertically 23 * @param {object} [openLinkIn] 24 * @param {string} openLinkIn.url 25 * @param {string} openLinkIn.where 26 * @param {object} openLinkIn.params 27 * @param {TimerId} timerId 28 */ 29 constructor(resultsPromise, resizeVertically, openLinkIn, timerId) { 30 /** @type {HTMLElement} */ 31 this.textEl = document.querySelector(".textRecognitionText"); 32 33 /** @type {NodeListOf<HTMLElement>} */ 34 this.headerEls = document.querySelectorAll(".textRecognitionHeader"); 35 36 /** @type {HTMLAnchorElement} */ 37 this.linkEl = document.querySelector( 38 "#text-recognition-header-no-results a" 39 ); 40 41 this.resizeVertically = resizeVertically; 42 this.openLinkIn = openLinkIn; 43 this.setupLink(); 44 this.setupCloseHandler(); 45 46 this.showHeaderByID("text-recognition-header-loading"); 47 48 resultsPromise.then( 49 ({ results, direction }) => { 50 if (results.length === 0) { 51 // Update the UI to indicate that there were no results. 52 this.showHeaderByID("text-recognition-header-no-results"); 53 // It's still worth recording telemetry times, as the API was still invoked. 54 Glean.textRecognition.apiPerformance.stopAndAccumulate(timerId); 55 return; 56 } 57 58 // There were results, cluster them into a nice presentation, and present 59 // the results to the UI. 60 this.runClusteringAndUpdateUI(results, direction); 61 this.showHeaderByID("text-recognition-header-results"); 62 Glean.textRecognition.apiPerformance.stopAndAccumulate(timerId); 63 64 TextRecognitionModal.recordInteractionTime(); 65 }, 66 error => { 67 // There was an error in the text recognition call. Treat this as the same 68 // as if there were no results, but report the error to the console and telemetry. 69 this.showHeaderByID("text-recognition-header-no-results"); 70 71 console.error( 72 "There was an error recognizing the text from an image.", 73 error 74 ); 75 Glean.browserUiInteraction.textrecognitionError.add(1); 76 Glean.textRecognition.apiPerformance.cancel(timerId); 77 } 78 ); 79 } 80 81 /** 82 * After the results are shown, measure how long a user interacts with the modal. 83 */ 84 static recordInteractionTime() { 85 let timerId = Glean.textRecognition.interactionTiming.start(); 86 87 const finish = () => { 88 Glean.textRecognition.interactionTiming.stopAndAccumulate(timerId); 89 window.removeEventListener("blur", finish); 90 window.removeEventListener("unload", finish); 91 }; 92 93 // The user's focus went away, record this as the total interaction, even if they 94 // go back and interact with it more. This can be triggered by doing actions like 95 // clicking the URL bar, or by switching tabs. 96 window.addEventListener("blur", finish); 97 98 // The modal is closed. 99 window.addEventListener("unload", finish); 100 } 101 102 /** 103 * After the results are shown, measure how long a user interacts with the modal. 104 * 105 * @param {number} textLength 106 */ 107 static recordTextLengthTelemetry(textLength) { 108 Glean.textRecognition.textLength.accumulateSingleSample(textLength); 109 } 110 111 setupCloseHandler() { 112 document 113 .querySelector("#text-recognition-close") 114 .addEventListener("click", () => { 115 window.close(); 116 }); 117 } 118 119 /** 120 * Apply the variables for the support.mozilla.org URL. 121 */ 122 setupLink() { 123 this.linkEl.href = Services.urlFormatter.formatURL(this.linkEl.href); 124 this.linkEl.addEventListener("click", event => { 125 event.preventDefault(); 126 this.openLinkIn(this.linkEl.href, "tab", { 127 forceForeground: true, 128 triggeringPrincipal: 129 Services.scriptSecurityManager.getSystemPrincipal(), 130 }); 131 }); 132 } 133 134 /** 135 * A helper to only show the appropriate header. 136 * 137 * @param {string} id 138 */ 139 showHeaderByID(id) { 140 for (const header of this.headerEls) { 141 header.style.display = "none"; 142 } 143 144 document.getElementById(id).style.display = ""; 145 this.resizeVertically(); 146 } 147 148 /** 149 * @param {string} text 150 */ 151 static copy(text) { 152 const clipboard = Cc["@mozilla.org/widget/clipboardhelper;1"].getService( 153 Ci.nsIClipboardHelper 154 ); 155 clipboard.copyString(text); 156 } 157 158 /** 159 * Cluster the text based on its visual position. 160 * 161 * @param {TextRecognitionResult[]} results 162 * @param {"ltr" | "rtl"} direction 163 */ 164 runClusteringAndUpdateUI(results, direction) { 165 /** @type {Vec2[]} */ 166 const centers = []; 167 168 for (const result of results) { 169 const p = result.quad; 170 171 // Pick either the left-most or right-most edge. This optimizes for 172 // aligned text over centered text. 173 const minOrMax = direction === "ltr" ? Math.min : Math.max; 174 175 centers.push([ 176 minOrMax(p.p1.x, p.p2.x, p.p3.x, p.p4.x), 177 (p.p1.y, p.p2.y, p.p3.y, p.p4.y) / 4, 178 ]); 179 } 180 181 const distSq = new DistanceSquared(centers); 182 183 // The values are ranged 0 - 1. This value might be able to be determined 184 // algorithmically. 185 const averageDistance = Math.sqrt(distSq.quantile(0.2)); 186 const clusters = densityCluster( 187 centers, 188 // Neighborhood radius: 189 averageDistance, 190 // Minimum points to form a cluster: 191 2 192 ); 193 194 let text = ""; 195 for (const cluster of clusters) { 196 const pCluster = document.createElement("p"); 197 pCluster.className = "textRecognitionTextCluster"; 198 199 for (let i = 0; i < cluster.length; i++) { 200 const index = cluster[i]; 201 const { string } = results[index]; 202 if (i + 1 === cluster.length) { 203 // Each cluster could be a paragraph, so add newlines to the end 204 // for better copying. 205 text += string + "\n\n"; 206 // The paragraph tag automatically uses two newlines. 207 pCluster.innerText += string; 208 } else { 209 // This text is assumed to be a newlines in a paragraph, so only needs 210 // to be separated by a space. 211 text += string + " "; 212 pCluster.innerText += string + " "; 213 } 214 } 215 this.textEl.appendChild(pCluster); 216 } 217 218 this.textEl.style.display = "block"; 219 220 text = text.trim(); 221 TextRecognitionModal.copy(text); 222 TextRecognitionModal.recordTextLengthTelemetry(text.length); 223 } 224 } 225 226 /** 227 * A two dimensional vector. 228 * 229 * @typedef {number[]} Vec2 230 */ 231 232 /** 233 * @typedef {number} PointIndex 234 */ 235 236 /** 237 * An implementation of the DBSCAN clustering algorithm. 238 * 239 * https://en.wikipedia.org/wiki/DBSCAN#Algorithm 240 * 241 * @param {Vec2[]} points 242 * @param {number} distance 243 * @param {number} minPoints 244 * @returns {Array<PointIndex[]>} 245 */ 246 function densityCluster(points, distance, minPoints) { 247 /** 248 * A flat of array of labels that match the index of the points array. The values have 249 * the following meaning: 250 * 251 * undefined := No label has been assigned 252 * "noise" := Noise is a point that hasn't been clustered. 253 * number := Cluster index 254 * 255 * @type {undefined | "noise" | Index} 256 */ 257 const labels = Array(points.length); 258 const noiseLabel = "noise"; 259 260 let nextClusterIndex = 0; 261 262 // Every point must be visited at least once. Often they will be visited earlier 263 // in the interior of the loop. 264 for (let pointIndex = 0; pointIndex < points.length; pointIndex++) { 265 if (labels[pointIndex] !== undefined) { 266 // This point is already labeled from the interior logic. 267 continue; 268 } 269 270 // Get the neighbors that are within the range of the epsilon value, includes 271 // the current point. 272 const neighbors = getNeighborsWithinDistance(points, distance, pointIndex); 273 274 if (neighbors.length < minPoints) { 275 labels[pointIndex] = noiseLabel; 276 continue; 277 } 278 279 // Start a new cluster. 280 const clusterIndex = nextClusterIndex++; 281 labels[pointIndex] = clusterIndex; 282 283 // Fill the cluster. The neighbors array grows. 284 for (let i = 0; i < neighbors.length; i++) { 285 const nextPointIndex = neighbors[i]; 286 if (typeof labels[nextPointIndex] === "number") { 287 // This point was already claimed, ignore it. 288 continue; 289 } 290 291 if (labels[nextPointIndex] === noiseLabel) { 292 // Claim this point and move on since noise has no neighbors. 293 labels[nextPointIndex] = clusterIndex; 294 continue; 295 } 296 297 // Claim this point as part of this cluster. 298 labels[nextPointIndex] = clusterIndex; 299 300 const newNeighbors = getNeighborsWithinDistance( 301 points, 302 distance, 303 nextPointIndex 304 ); 305 306 if (newNeighbors.length >= minPoints) { 307 // Add on to the neighbors if more are found. 308 for (const newNeighbor of newNeighbors) { 309 if (!neighbors.includes(newNeighbor)) { 310 neighbors.push(newNeighbor); 311 } 312 } 313 } 314 } 315 } 316 317 const clusters = []; 318 319 // Pre-populate the clusters. 320 for (let i = 0; i < nextClusterIndex; i++) { 321 clusters[i] = []; 322 } 323 324 // Turn the labels into clusters, adding the noise to the end. 325 for (let pointIndex = 0; pointIndex < labels.length; pointIndex++) { 326 const label = labels[pointIndex]; 327 if (typeof label === "number") { 328 clusters[label].push(pointIndex); 329 } else if (label === noiseLabel) { 330 // Add a single cluster. 331 clusters.push([pointIndex]); 332 } else { 333 throw new Error("Logic error. Expected every point to have a label."); 334 } 335 } 336 337 clusters.sort((a, b) => points[b[0]][1] - points[a[0]][1]); 338 339 return clusters; 340 } 341 342 /** 343 * @param {Vec2[]} points 344 * @param {number} distance 345 * @param {number} index 346 * @returns {Index[]} 347 */ 348 function getNeighborsWithinDistance(points, distance, index) { 349 let neighbors = [index]; 350 // There is no reason to compute the square root here if we square the 351 // original distance. 352 const distanceSquared = distance * distance; 353 354 for (let otherIndex = 0; otherIndex < points.length; otherIndex++) { 355 if (otherIndex === index) { 356 continue; 357 } 358 const a = points[index]; 359 const b = points[otherIndex]; 360 const dx = a[0] - b[0]; 361 const dy = a[1] - b[1]; 362 363 if (dx * dx + dy * dy < distanceSquared) { 364 neighbors.push(otherIndex); 365 } 366 } 367 368 return neighbors; 369 } 370 371 /** 372 * This class pre-computes the squared distances to allow for efficient distance lookups, 373 * and it provides a way to look up a distance quantile. 374 */ 375 class DistanceSquared { 376 /** @type {Map<number>} */ 377 #distances = new Map(); 378 #list; 379 #distancesSorted; 380 381 /** 382 * @param {Vec2[]} list 383 */ 384 constructor(list) { 385 this.#list = list; 386 for (let aIndex = 0; aIndex < list.length; aIndex++) { 387 for (let bIndex = aIndex + 1; bIndex < list.length; bIndex++) { 388 const id = this.#getTupleID(aIndex, bIndex); 389 const a = this.#list[aIndex]; 390 const b = this.#list[bIndex]; 391 const dx = a[0] - b[0]; 392 const dy = a[1] - b[1]; 393 this.#distances.set(id, dx * dx + dy * dy); 394 } 395 } 396 } 397 398 /** 399 * Returns a unique tuple ID to identify the relationship between two vectors. 400 */ 401 #getTupleID(aIndex, bIndex) { 402 return aIndex < bIndex 403 ? aIndex * this.#list.length + bIndex 404 : bIndex * this.#list.length + aIndex; 405 } 406 407 /** 408 * Returns the distance squared between two vectors. 409 * 410 * @param {Index} aIndex 411 * @param {Index} bIndex 412 * @returns {number} The distance squared 413 */ 414 get(aIndex, bIndex) { 415 return this.#distances.get(this.#getTupleID(aIndex, bIndex)); 416 } 417 418 /** 419 * Returns the quantile squared. 420 * 421 * @param {number} percentile - Ranged between 0 - 1 422 * @returns {number} 423 */ 424 quantile(percentile) { 425 if (!this.#distancesSorted) { 426 this.#distancesSorted = [...this.#distances.values()].sort( 427 (a, b) => a - b 428 ); 429 } 430 const index = Math.max( 431 0, 432 Math.min( 433 this.#distancesSorted.length - 1, 434 Math.round(this.#distancesSorted.length * percentile) 435 ) 436 ); 437 return this.#distancesSorted[index]; 438 } 439 }