tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

walker-search.js (11196B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 "use strict";
      6 
      7 loader.lazyRequireGetter(
      8  this,
      9  ["isWhitespaceTextNode", "getNodeDisplayName"],
     10  "resource://devtools/server/actors/inspector/utils.js",
     11  true
     12 );
     13 /**
     14 * The walker-search module provides a simple API to index and search strings
     15 * and elements inside a given document.
     16 * It indexes tag names, attribute names and values, and text contents.
     17 * It provides a simple search function that returns a list of nodes that
     18 * matched.
     19 */
     20 
     21 class WalkerIndex {
     22  /**
     23   * The WalkerIndex class indexes the document (and all subdocs) from
     24   * a given walker.
     25   *
     26   * It is only indexed the first time the data is accessed and will be
     27   * re-indexed if a mutation happens between requests.
     28   *
     29   * @param {Walker} walker The walker to be indexed
     30   */
     31  constructor(walker) {
     32    this.walker = walker;
     33    this.clearIndex = this.clearIndex.bind(this);
     34 
     35    // Kill the index when mutations occur, the next data get will re-index.
     36    this.walker.on("any-mutation", this.clearIndex);
     37  }
     38 
     39  /**
     40   * Destroy this instance, releasing all data and references
     41   */
     42  destroy() {
     43    this.walker.off("any-mutation", this.clearIndex);
     44  }
     45 
     46  clearIndex() {
     47    if (!this.currentlyIndexing) {
     48      this._data = null;
     49    }
     50  }
     51 
     52  get doc() {
     53    return this.walker.rootDoc;
     54  }
     55 
     56  /**
     57   * Get the indexed data
     58   * This getter also indexes if it hasn't been done yet or if the state is
     59   * dirty
     60   *
     61   * @returns Map<String, Array<{type:String, node:DOMNode}>>
     62   *          A Map keyed on the searchable value, containing an array with
     63   *          objects containing the 'type' (one of ALL_RESULTS_TYPES), and
     64   *          the DOM Node.
     65   */
     66  get data() {
     67    if (!this._data) {
     68      this._data = new Map();
     69      this.index();
     70    }
     71 
     72    return this._data;
     73  }
     74 
     75  _addToIndex(type, node, value) {
     76    // Add an entry for this value if there isn't one
     77    const entry = this._data.get(value);
     78    if (!entry) {
     79      this._data.set(value, []);
     80    }
     81 
     82    // Add the type/node to the list
     83    this._data.get(value).push({
     84      type,
     85      node,
     86    });
     87  }
     88 
     89  index() {
     90    // Handle case where iterating nextNode() with the deepTreeWalker triggers
     91    // a mutation (Bug 1222558)
     92    this.currentlyIndexing = true;
     93 
     94    const documentWalker = this.walker.getDocumentWalker(this.doc);
     95    while (documentWalker.nextNode()) {
     96      const node = documentWalker.currentNode;
     97 
     98      if (
     99        this.walker.targetActor.ignoreSubFrames &&
    100        node.ownerDocument !== this.doc
    101      ) {
    102        continue;
    103      }
    104 
    105      if (node.nodeType === 1) {
    106        if (node.implementedPseudoElement) {
    107          // For pseudo elements we get the displayName (e.g. `::view-transition-group(myGroup)`)
    108          const displayName = getNodeDisplayName(node);
    109          this._addToIndex("tag", node, displayName);
    110 
    111          // And for the pseudo elements that do have text child (via the CSS `content` property),
    112          // we also get the text.
    113          if (
    114            displayName === "::marker" ||
    115            displayName === "::before" ||
    116            displayName === "::after"
    117          ) {
    118            this._addToIndex("text", node, node.textContent.trim());
    119          }
    120        } else {
    121          // For each element node, we get the tagname …
    122          this._addToIndex("tag", node, node.localName);
    123        }
    124 
    125        // … and all attributes names and values
    126        for (const { name, value } of node.attributes) {
    127          this._addToIndex("attributeName", node, name);
    128          this._addToIndex("attributeValue", node, value);
    129        }
    130      } else if (node.textContent && node.textContent.trim().length) {
    131        // For comments and text nodes, we get the text
    132        this._addToIndex("text", node, node.textContent.trim());
    133      }
    134    }
    135 
    136    this.currentlyIndexing = false;
    137  }
    138 }
    139 
    140 exports.WalkerIndex = WalkerIndex;
    141 
    142 class WalkerSearch {
    143  /**
    144   * The WalkerSearch class provides a way to search an indexed document as well
    145   * as find elements that match a given css selector.
    146   *
    147   * Usage example:
    148   * let s = new WalkerSearch(doc);
    149   * let res = s.search("lang", index);
    150   * for (let {matched, results} of res) {
    151   *   for (let {node, type} of results) {
    152   *     console.log("The query matched a node's " + type);
    153   *     console.log("Node that matched", node);
    154   *    }
    155   * }
    156   * s.destroy();
    157   *
    158   * @param {Walker} the walker to be searched
    159   */
    160  constructor(walker) {
    161    this.walker = walker;
    162    this.index = new WalkerIndex(this.walker);
    163  }
    164 
    165  destroy() {
    166    this.index.destroy();
    167    this.walker = null;
    168  }
    169 
    170  _addResult(node, type, results) {
    171    if (!results.has(node)) {
    172      results.set(node, []);
    173    }
    174 
    175    const matches = results.get(node);
    176 
    177    // Do not add if the exact same result is already in the list
    178    let isKnown = false;
    179    for (const match of matches) {
    180      if (match.type === type) {
    181        isKnown = true;
    182        break;
    183      }
    184    }
    185 
    186    if (!isKnown) {
    187      matches.push({ type });
    188    }
    189  }
    190 
    191  _searchIndex(query, options, results) {
    192    for (const [matched, res] of this.index.data) {
    193      if (!options.searchMethod(query, matched)) {
    194        continue;
    195      }
    196 
    197      // Add any relevant results (skipping non-requested options).
    198      res
    199        .filter(entry => {
    200          return options.types.includes(entry.type);
    201        })
    202        .forEach(({ node, type }) => {
    203          this._addResult(node, type, results);
    204        });
    205    }
    206  }
    207 
    208  _attributeMatch(attributeValue, expectedAttributeValue, isExactMatch) {
    209    if (expectedAttributeValue === undefined) {
    210      return true;
    211    }
    212    if (isExactMatch) {
    213      return attributeValue === expectedAttributeValue;
    214    }
    215    return attributeValue.startsWith(expectedAttributeValue);
    216  }
    217 
    218  _searchAttribute(query, options, results) {
    219    if (
    220      !options.types.includes("attributeName") ||
    221      !options.types.includes("attributeValue")
    222    ) {
    223      return;
    224    }
    225    if (!query.includes("=") || query.trim() === "=") {
    226      return;
    227    }
    228 
    229    let [attributeName, attributeValue] = query.split("=", 2);
    230 
    231    // Remove leading and trailing quotes
    232    const isExactMatch =
    233      attributeValue.startsWith('"') && attributeValue.endsWith('"');
    234    attributeValue = attributeValue?.replace(/(^")|("$)/gi, "");
    235 
    236    if (attributeName === undefined || attributeName === "") {
    237      for (const [str, entries] of this.index.data) {
    238        if (!this._attributeMatch(str, attributeValue, isExactMatch)) {
    239          continue;
    240        }
    241 
    242        for (const entry of entries) {
    243          if (entry.type !== "attributeValue") {
    244            continue;
    245          }
    246          this._addResult(entry.node, "attributeValue", results);
    247        }
    248      }
    249    } else if (this.index.data.has(attributeName)) {
    250      for (const entry of this.index.data.get(attributeName)) {
    251        if (entry.type !== "attributeName") {
    252          continue;
    253        }
    254        if (
    255          !this._attributeMatch(
    256            entry.node.attributes[attributeName].value,
    257            attributeValue,
    258            isExactMatch
    259          )
    260        ) {
    261          continue;
    262        }
    263        this._addResult(entry.node, "attributeName", results);
    264      }
    265    }
    266  }
    267 
    268  _searchSelectors(query, options, results) {
    269    if (!options.types.includes("selector")) {
    270      return;
    271    }
    272 
    273    // If the query is just one "word", no need to search because _searchIndex
    274    // will lead the same results since it has access to tagnames anyway
    275    if (
    276      // regular tagname
    277      query.match(/^[a-z]+$/i)
    278    ) {
    279      return;
    280    }
    281 
    282    // If the query is not a valid selector, bail
    283    if (!CSS.supports(`selector(${query})`)) {
    284      return;
    285    }
    286 
    287    const nodes = this.walker._multiFrameQuerySelectorAll(query);
    288    for (const node of nodes) {
    289      this._addResult(node, "selector", results);
    290    }
    291  }
    292 
    293  _searchXPath(query, options, results) {
    294    if (!options.types.includes("xpath")) {
    295      return;
    296    }
    297 
    298    const nodes = this.walker._multiFrameXPath(query);
    299    for (const node of nodes) {
    300      // Exclude text nodes that only contain whitespace
    301      // because they are not displayed in the Inspector.
    302      if (!isWhitespaceTextNode(node)) {
    303        this._addResult(node, "xpath", results);
    304      }
    305    }
    306  }
    307 
    308  /**
    309   * Search the document
    310   *
    311   * @param {string} query What to search for
    312   * @param {object} options The following options are accepted:
    313   * - searchMethod {String} one of WalkerSearch.SEARCH_METHOD_*
    314   *   defaults to WalkerSearch.SEARCH_METHOD_CONTAINS (does not apply to
    315   *   selector and XPath search types)
    316   * - types {Array} a list of things to search for (tag, text, attributes, etc)
    317   *   defaults to WalkerSearch.ALL_RESULTS_TYPES
    318   * @return {Array} An array is returned with each item being an object like:
    319   * {
    320   *   node: <the dom node that matched>,
    321   *   type: <the type of match: one of WalkerSearch.ALL_RESULTS_TYPES>
    322   * }
    323   */
    324  search(query, options = {}) {
    325    options.searchMethod =
    326      options.searchMethod || WalkerSearch.SEARCH_METHOD_CONTAINS;
    327    options.types = options.types || WalkerSearch.ALL_RESULTS_TYPES;
    328 
    329    // Empty strings will return no results, as will non-string input
    330    if (typeof query !== "string") {
    331      query = "";
    332    }
    333 
    334    // Store results in a map indexed by nodes to avoid duplicate results
    335    const results = new Map();
    336 
    337    // Search through the indexed data
    338    this._searchIndex(query, options, results);
    339 
    340    // Search with querySelectorAll
    341    this._searchSelectors(query, options, results);
    342 
    343    // Search for attributeName=attributeValue pairs
    344    this._searchAttribute(query, options, results);
    345 
    346    // Search with XPath
    347    this._searchXPath(query, options, results);
    348 
    349    // Concatenate all results into an Array to return
    350    const resultList = [];
    351    for (const [node, matches] of results) {
    352      for (const { type } of matches) {
    353        resultList.push({
    354          node,
    355          type,
    356        });
    357      }
    358    }
    359 
    360    const documents = this.walker.targetActor.windows.map(win => win.document);
    361 
    362    // Sort the resulting nodes by order of appearance in the DOM
    363    resultList.sort((a, b) => {
    364      // Disconnected nodes won't get good results from compareDocumentPosition
    365      // so check the order of their document instead.
    366      if (a.node.ownerDocument != b.node.ownerDocument) {
    367        const indA = documents.indexOf(a.node.ownerDocument);
    368        const indB = documents.indexOf(b.node.ownerDocument);
    369        return indA - indB;
    370      }
    371      // If the same document, then sort on DOCUMENT_POSITION_FOLLOWING (4)
    372      // which means B is after A.
    373      return a.node.compareDocumentPosition(b.node) & 4 ? -1 : 1;
    374    });
    375 
    376    return resultList;
    377  }
    378 }
    379 
    380 WalkerSearch.SEARCH_METHOD_CONTAINS = (query, candidate) => {
    381  return query && candidate.toLowerCase().includes(query.toLowerCase());
    382 };
    383 
    384 WalkerSearch.ALL_RESULTS_TYPES = [
    385  "tag",
    386  "text",
    387  "attributeName",
    388  "attributeValue",
    389  "selector",
    390  "xpath",
    391 ];
    392 
    393 exports.WalkerSearch = WalkerSearch;