tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

textrecognition.mjs (12720B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 window.docShell.chromeEventHandler.classList.add("textRecognitionDialogFrame");
      6 
      7 window.addEventListener("DOMContentLoaded", () => {
      8  // The arguments are passed in as the final parameters to TabDialogBox.prototype.open.
      9  new TextRecognitionModal(...window.arguments);
     10 });
     11 
     12 /**
     13 * @typedef {object} TextRecognitionResult
     14 * @property {number} confidence
     15 * @property {string} string
     16 * @property {DOMQuad} quad
     17 */
     18 
     19 class TextRecognitionModal {
     20  /**
     21   * @param {Promise<TextRecognitionResult[]>} resultsPromise
     22   * @param {Function} resizeVertically
     23   * @param {object} [openLinkIn]
     24   * @param {string} openLinkIn.url
     25   * @param {string} openLinkIn.where
     26   * @param {object} openLinkIn.params
     27   * @param {TimerId} timerId
     28   */
     29  constructor(resultsPromise, resizeVertically, openLinkIn, timerId) {
     30    /** @type {HTMLElement} */
     31    this.textEl = document.querySelector(".textRecognitionText");
     32 
     33    /** @type {NodeListOf<HTMLElement>} */
     34    this.headerEls = document.querySelectorAll(".textRecognitionHeader");
     35 
     36    /** @type {HTMLAnchorElement} */
     37    this.linkEl = document.querySelector(
     38      "#text-recognition-header-no-results a"
     39    );
     40 
     41    this.resizeVertically = resizeVertically;
     42    this.openLinkIn = openLinkIn;
     43    this.setupLink();
     44    this.setupCloseHandler();
     45 
     46    this.showHeaderByID("text-recognition-header-loading");
     47 
     48    resultsPromise.then(
     49      ({ results, direction }) => {
     50        if (results.length === 0) {
     51          // Update the UI to indicate that there were no results.
     52          this.showHeaderByID("text-recognition-header-no-results");
     53          // It's still worth recording telemetry times, as the API was still invoked.
     54          Glean.textRecognition.apiPerformance.stopAndAccumulate(timerId);
     55          return;
     56        }
     57 
     58        // There were results, cluster them into a nice presentation, and present
     59        // the results to the UI.
     60        this.runClusteringAndUpdateUI(results, direction);
     61        this.showHeaderByID("text-recognition-header-results");
     62        Glean.textRecognition.apiPerformance.stopAndAccumulate(timerId);
     63 
     64        TextRecognitionModal.recordInteractionTime();
     65      },
     66      error => {
     67        // There was an error in the text recognition call. Treat this as the same
     68        // as if there were no results, but report the error to the console and telemetry.
     69        this.showHeaderByID("text-recognition-header-no-results");
     70 
     71        console.error(
     72          "There was an error recognizing the text from an image.",
     73          error
     74        );
     75        Glean.browserUiInteraction.textrecognitionError.add(1);
     76        Glean.textRecognition.apiPerformance.cancel(timerId);
     77      }
     78    );
     79  }
     80 
     81  /**
     82   * After the results are shown, measure how long a user interacts with the modal.
     83   */
     84  static recordInteractionTime() {
     85    let timerId = Glean.textRecognition.interactionTiming.start();
     86 
     87    const finish = () => {
     88      Glean.textRecognition.interactionTiming.stopAndAccumulate(timerId);
     89      window.removeEventListener("blur", finish);
     90      window.removeEventListener("unload", finish);
     91    };
     92 
     93    // The user's focus went away, record this as the total interaction, even if they
     94    // go back and interact with it more. This can be triggered by doing actions like
     95    // clicking the URL bar, or by switching tabs.
     96    window.addEventListener("blur", finish);
     97 
     98    // The modal is closed.
     99    window.addEventListener("unload", finish);
    100  }
    101 
    102  /**
    103   * After the results are shown, measure how long a user interacts with the modal.
    104   *
    105   * @param {number} textLength
    106   */
    107  static recordTextLengthTelemetry(textLength) {
    108    Glean.textRecognition.textLength.accumulateSingleSample(textLength);
    109  }
    110 
    111  setupCloseHandler() {
    112    document
    113      .querySelector("#text-recognition-close")
    114      .addEventListener("click", () => {
    115        window.close();
    116      });
    117  }
    118 
    119  /**
    120   * Apply the variables for the support.mozilla.org URL.
    121   */
    122  setupLink() {
    123    this.linkEl.href = Services.urlFormatter.formatURL(this.linkEl.href);
    124    this.linkEl.addEventListener("click", event => {
    125      event.preventDefault();
    126      this.openLinkIn(this.linkEl.href, "tab", {
    127        forceForeground: true,
    128        triggeringPrincipal:
    129          Services.scriptSecurityManager.getSystemPrincipal(),
    130      });
    131    });
    132  }
    133 
    134  /**
    135   * A helper to only show the appropriate header.
    136   *
    137   * @param {string} id
    138   */
    139  showHeaderByID(id) {
    140    for (const header of this.headerEls) {
    141      header.style.display = "none";
    142    }
    143 
    144    document.getElementById(id).style.display = "";
    145    this.resizeVertically();
    146  }
    147 
    148  /**
    149   * @param {string} text
    150   */
    151  static copy(text) {
    152    const clipboard = Cc["@mozilla.org/widget/clipboardhelper;1"].getService(
    153      Ci.nsIClipboardHelper
    154    );
    155    clipboard.copyString(text);
    156  }
    157 
    158  /**
    159   * Cluster the text based on its visual position.
    160   *
    161   * @param {TextRecognitionResult[]} results
    162   * @param {"ltr" | "rtl"} direction
    163   */
    164  runClusteringAndUpdateUI(results, direction) {
    165    /** @type {Vec2[]} */
    166    const centers = [];
    167 
    168    for (const result of results) {
    169      const p = result.quad;
    170 
    171      // Pick either the left-most or right-most edge. This optimizes for
    172      // aligned text over centered text.
    173      const minOrMax = direction === "ltr" ? Math.min : Math.max;
    174 
    175      centers.push([
    176        minOrMax(p.p1.x, p.p2.x, p.p3.x, p.p4.x),
    177        (p.p1.y, p.p2.y, p.p3.y, p.p4.y) / 4,
    178      ]);
    179    }
    180 
    181    const distSq = new DistanceSquared(centers);
    182 
    183    // The values are ranged 0 - 1. This value might be able to be determined
    184    // algorithmically.
    185    const averageDistance = Math.sqrt(distSq.quantile(0.2));
    186    const clusters = densityCluster(
    187      centers,
    188      // Neighborhood radius:
    189      averageDistance,
    190      // Minimum points to form a cluster:
    191      2
    192    );
    193 
    194    let text = "";
    195    for (const cluster of clusters) {
    196      const pCluster = document.createElement("p");
    197      pCluster.className = "textRecognitionTextCluster";
    198 
    199      for (let i = 0; i < cluster.length; i++) {
    200        const index = cluster[i];
    201        const { string } = results[index];
    202        if (i + 1 === cluster.length) {
    203          // Each cluster could be a paragraph, so add newlines to the end
    204          // for better copying.
    205          text += string + "\n\n";
    206          // The paragraph tag automatically uses two newlines.
    207          pCluster.innerText += string;
    208        } else {
    209          // This text is assumed to be a newlines in a paragraph, so only needs
    210          // to be separated by a space.
    211          text += string + " ";
    212          pCluster.innerText += string + " ";
    213        }
    214      }
    215      this.textEl.appendChild(pCluster);
    216    }
    217 
    218    this.textEl.style.display = "block";
    219 
    220    text = text.trim();
    221    TextRecognitionModal.copy(text);
    222    TextRecognitionModal.recordTextLengthTelemetry(text.length);
    223  }
    224 }
    225 
    226 /**
    227 * A two dimensional vector.
    228 *
    229 * @typedef {number[]} Vec2
    230 */
    231 
    232 /**
    233 * @typedef {number} PointIndex
    234 */
    235 
    236 /**
    237 * An implementation of the DBSCAN clustering algorithm.
    238 *
    239 * https://en.wikipedia.org/wiki/DBSCAN#Algorithm
    240 *
    241 * @param {Vec2[]} points
    242 * @param {number} distance
    243 * @param {number} minPoints
    244 * @returns {Array<PointIndex[]>}
    245 */
    246 function densityCluster(points, distance, minPoints) {
    247  /**
    248   * A flat of array of labels that match the index of the points array. The values have
    249   * the following meaning:
    250   *
    251   *   undefined := No label has been assigned
    252   *   "noise"   := Noise is a point that hasn't been clustered.
    253   *   number    := Cluster index
    254   *
    255   * @type {undefined | "noise" | Index}
    256   */
    257  const labels = Array(points.length);
    258  const noiseLabel = "noise";
    259 
    260  let nextClusterIndex = 0;
    261 
    262  // Every point must be visited at least once. Often they will be visited earlier
    263  // in the interior of the loop.
    264  for (let pointIndex = 0; pointIndex < points.length; pointIndex++) {
    265    if (labels[pointIndex] !== undefined) {
    266      // This point is already labeled from the interior logic.
    267      continue;
    268    }
    269 
    270    // Get the neighbors that are within the range of the epsilon value, includes
    271    // the current point.
    272    const neighbors = getNeighborsWithinDistance(points, distance, pointIndex);
    273 
    274    if (neighbors.length < minPoints) {
    275      labels[pointIndex] = noiseLabel;
    276      continue;
    277    }
    278 
    279    // Start a new cluster.
    280    const clusterIndex = nextClusterIndex++;
    281    labels[pointIndex] = clusterIndex;
    282 
    283    // Fill the cluster. The neighbors array grows.
    284    for (let i = 0; i < neighbors.length; i++) {
    285      const nextPointIndex = neighbors[i];
    286      if (typeof labels[nextPointIndex] === "number") {
    287        // This point was already claimed, ignore it.
    288        continue;
    289      }
    290 
    291      if (labels[nextPointIndex] === noiseLabel) {
    292        // Claim this point and move on since noise has no neighbors.
    293        labels[nextPointIndex] = clusterIndex;
    294        continue;
    295      }
    296 
    297      // Claim this point as part of this cluster.
    298      labels[nextPointIndex] = clusterIndex;
    299 
    300      const newNeighbors = getNeighborsWithinDistance(
    301        points,
    302        distance,
    303        nextPointIndex
    304      );
    305 
    306      if (newNeighbors.length >= minPoints) {
    307        // Add on to the neighbors if more are found.
    308        for (const newNeighbor of newNeighbors) {
    309          if (!neighbors.includes(newNeighbor)) {
    310            neighbors.push(newNeighbor);
    311          }
    312        }
    313      }
    314    }
    315  }
    316 
    317  const clusters = [];
    318 
    319  // Pre-populate the clusters.
    320  for (let i = 0; i < nextClusterIndex; i++) {
    321    clusters[i] = [];
    322  }
    323 
    324  // Turn the labels into clusters, adding the noise to the end.
    325  for (let pointIndex = 0; pointIndex < labels.length; pointIndex++) {
    326    const label = labels[pointIndex];
    327    if (typeof label === "number") {
    328      clusters[label].push(pointIndex);
    329    } else if (label === noiseLabel) {
    330      // Add a single cluster.
    331      clusters.push([pointIndex]);
    332    } else {
    333      throw new Error("Logic error. Expected every point to have a label.");
    334    }
    335  }
    336 
    337  clusters.sort((a, b) => points[b[0]][1] - points[a[0]][1]);
    338 
    339  return clusters;
    340 }
    341 
    342 /**
    343 * @param {Vec2[]} points
    344 * @param {number} distance
    345 * @param {number} index
    346 * @returns {Index[]}
    347 */
    348 function getNeighborsWithinDistance(points, distance, index) {
    349  let neighbors = [index];
    350  // There is no reason to compute the square root here if we square the
    351  // original distance.
    352  const distanceSquared = distance * distance;
    353 
    354  for (let otherIndex = 0; otherIndex < points.length; otherIndex++) {
    355    if (otherIndex === index) {
    356      continue;
    357    }
    358    const a = points[index];
    359    const b = points[otherIndex];
    360    const dx = a[0] - b[0];
    361    const dy = a[1] - b[1];
    362 
    363    if (dx * dx + dy * dy < distanceSquared) {
    364      neighbors.push(otherIndex);
    365    }
    366  }
    367 
    368  return neighbors;
    369 }
    370 
    371 /**
    372 * This class pre-computes the squared distances to allow for efficient distance lookups,
    373 * and it provides a way to look up a distance quantile.
    374 */
    375 class DistanceSquared {
    376  /** @type {Map<number>} */
    377  #distances = new Map();
    378  #list;
    379  #distancesSorted;
    380 
    381  /**
    382   * @param {Vec2[]} list
    383   */
    384  constructor(list) {
    385    this.#list = list;
    386    for (let aIndex = 0; aIndex < list.length; aIndex++) {
    387      for (let bIndex = aIndex + 1; bIndex < list.length; bIndex++) {
    388        const id = this.#getTupleID(aIndex, bIndex);
    389        const a = this.#list[aIndex];
    390        const b = this.#list[bIndex];
    391        const dx = a[0] - b[0];
    392        const dy = a[1] - b[1];
    393        this.#distances.set(id, dx * dx + dy * dy);
    394      }
    395    }
    396  }
    397 
    398  /**
    399   * Returns a unique tuple ID to identify the relationship between two vectors.
    400   */
    401  #getTupleID(aIndex, bIndex) {
    402    return aIndex < bIndex
    403      ? aIndex * this.#list.length + bIndex
    404      : bIndex * this.#list.length + aIndex;
    405  }
    406 
    407  /**
    408   * Returns the distance squared between two vectors.
    409   *
    410   * @param {Index} aIndex
    411   * @param {Index} bIndex
    412   * @returns {number} The distance squared
    413   */
    414  get(aIndex, bIndex) {
    415    return this.#distances.get(this.#getTupleID(aIndex, bIndex));
    416  }
    417 
    418  /**
    419   * Returns the quantile squared.
    420   *
    421   * @param {number} percentile - Ranged between 0 - 1
    422   * @returns {number}
    423   */
    424  quantile(percentile) {
    425    if (!this.#distancesSorted) {
    426      this.#distancesSorted = [...this.#distances.values()].sort(
    427        (a, b) => a - b
    428      );
    429    }
    430    const index = Math.max(
    431      0,
    432      Math.min(
    433        this.#distancesSorted.length - 1,
    434        Math.round(this.#distancesSorted.length * percentile)
    435      )
    436    );
    437    return this.#distancesSorted[index];
    438  }
    439 }