tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

RecipeExecutor.mjs (32853B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /**
      6 * RecipeExecutor is the core feature engineering pipeline for the in-browser
      7 * personalization work. These pipelines are called "recipes". A recipe is an
      8 * array of objects that define a "step" in the recipe. A step is simply an
      9 * object with a field "function" that specifies what is being done in the step
     10 * along with other fields that are semantically defined for that step.
     11 *
     12 * There are two types of recipes "builder" recipes and "combiner" recipes. Builder
     13 * recipes mutate an object until it matches some set of critera. Combiner
     14 * recipes take two objects, (a "left" and a "right"), and specify the steps
     15 * to merge the right object into the left object.
     16 *
     17 * A short nonsense example recipe is:
     18 * [ {"function": "get_url_domain", "path_length": 1, "field": "url", "dest": "url_domain"},
     19 *   {"function": "nb_tag", "fields": ["title", "description"]},
     20 *   {"function": "conditionally_nmf_tag", "fields": ["title", "description"]} ]
     21 *
     22 * Recipes are sandboxed by the fact that the step functions must be explicitly
     23 * allowed. Functions allowed for builder recipes are specifed in the
     24 * RecipeExecutor.ITEM_BUILDER_REGISTRY, while combiner functions are allowed
     25 * in RecipeExecutor.ITEM_COMBINER_REGISTRY .
     26 */
     27 export class RecipeExecutor {
     28  constructor(nbTaggers, nmfTaggers, tokenize) {
     29    this.ITEM_BUILDER_REGISTRY = {
     30      nb_tag: this.naiveBayesTag,
     31      conditionally_nmf_tag: this.conditionallyNmfTag,
     32      accept_item_by_field_value: this.acceptItemByFieldValue,
     33      tokenize_url: this.tokenizeUrl,
     34      get_url_domain: this.getUrlDomain,
     35      tokenize_field: this.tokenizeField,
     36      copy_value: this.copyValue,
     37      keep_top_k: this.keepTopK,
     38      scalar_multiply: this.scalarMultiply,
     39      elementwise_multiply: this.elementwiseMultiply,
     40      vector_multiply: this.vectorMultiply,
     41      scalar_add: this.scalarAdd,
     42      vector_add: this.vectorAdd,
     43      make_boolean: this.makeBoolean,
     44      allow_fields: this.allowFields,
     45      filter_by_value: this.filterByValue,
     46      l2_normalize: this.l2Normalize,
     47      prob_normalize: this.probNormalize,
     48      set_default: this.setDefault,
     49      lookup_value: this.lookupValue,
     50      copy_to_map: this.copyToMap,
     51      scalar_multiply_tag: this.scalarMultiplyTag,
     52      apply_softmax_tags: this.applySoftmaxTags,
     53    };
     54    this.ITEM_COMBINER_REGISTRY = {
     55      combiner_add: this.combinerAdd,
     56      combiner_max: this.combinerMax,
     57      combiner_collect_values: this.combinerCollectValues,
     58    };
     59    this.nbTaggers = nbTaggers;
     60    this.nmfTaggers = nmfTaggers;
     61    this.tokenize = tokenize;
     62  }
     63 
     64  /**
     65   * Determines the type of a field. Valid types are:
     66   *   string
     67   *   number
     68   *   array
     69   *   map (strings to anything)
     70   */
     71  _typeOf(data) {
     72    let t = typeof data;
     73    if (t === "object") {
     74      if (data === null) {
     75        return "null";
     76      }
     77      if (Array.isArray(data)) {
     78        return "array";
     79      }
     80      return "map";
     81    }
     82    return t;
     83  }
     84 
     85  /**
     86   * Returns a scalar, either because it was a constant, or by
     87   * looking it up from the item. Allows for a default value if the lookup
     88   * fails.
     89   */
     90  _lookupScalar(item, k, dfault) {
     91    if (this._typeOf(k) === "number") {
     92      return k;
     93    } else if (
     94      this._typeOf(k) === "string" &&
     95      k in item &&
     96      this._typeOf(item[k]) === "number"
     97    ) {
     98      return item[k];
     99    }
    100    return dfault;
    101  }
    102 
    103  /**
    104   * Simply appends all the strings from a set fields together. If the field
    105   * is a list, then the cells of the list are append.
    106   */
    107  _assembleText(item, fields) {
    108    let textArr = [];
    109    for (let field of fields) {
    110      if (field in item) {
    111        let type = this._typeOf(item[field]);
    112        if (type === "string") {
    113          textArr.push(item[field]);
    114        } else if (type === "array") {
    115          for (let ele of item[field]) {
    116            textArr.push(String(ele));
    117          }
    118        } else {
    119          textArr.push(String(item[field]));
    120        }
    121      }
    122    }
    123    return textArr.join(" ");
    124  }
    125 
    126  /**
    127   * Runs the naive bayes text taggers over a set of text fields. Stores the
    128   * results in new fields:
    129   *  nb_tags:         a map of text strings to probabilites
    130   *  nb_tokens:       the tokenized text that was tagged
    131   *
    132   * Config:
    133   *  fields:          an array containing a list of fields to concatenate and tag
    134   */
    135  naiveBayesTag(item, config) {
    136    let text = this._assembleText(item, config.fields);
    137    let tokens = this.tokenize(text);
    138    let tags = {};
    139    let extended_tags = {};
    140 
    141    for (let nbTagger of this.nbTaggers) {
    142      let result = nbTagger.tagTokens(tokens);
    143      if (result.label !== null && result.confident) {
    144        extended_tags[result.label] = result;
    145        tags[result.label] = Math.exp(result.logProb);
    146      }
    147    }
    148    item.nb_tags = tags;
    149    item.nb_tags_extended = extended_tags;
    150    item.nb_tokens = tokens;
    151    return item;
    152  }
    153 
    154  /**
    155   * Selectively runs NMF text taggers depending on which tags were found
    156   * by the naive bayes taggers. Writes the results in into new fields:
    157   *  nmf_tags_parent_weights:  map of pareent tags to probabilites of those parent tags
    158   *  nmf_tags:                 map of strings to maps of strings to probabilities
    159   *  nmf_tags_parent           map of child tags to parent tags
    160   *
    161   * Config:
    162   *  Not configurable
    163   */
    164  conditionallyNmfTag(item) {
    165    let nestedNmfTags = {};
    166    let parentTags = {};
    167    let parentWeights = {};
    168 
    169    if (!("nb_tags" in item) || !("nb_tokens" in item)) {
    170      return null;
    171    }
    172 
    173    Object.keys(item.nb_tags).forEach(parentTag => {
    174      let nmfTagger = this.nmfTaggers[parentTag];
    175      if (nmfTagger !== undefined) {
    176        nestedNmfTags[parentTag] = {};
    177        parentWeights[parentTag] = item.nb_tags[parentTag];
    178        let nmfTags = nmfTagger.tagTokens(item.nb_tokens);
    179        Object.keys(nmfTags).forEach(nmfTag => {
    180          nestedNmfTags[parentTag][nmfTag] = nmfTags[nmfTag];
    181          parentTags[nmfTag] = parentTag;
    182        });
    183      }
    184    });
    185 
    186    item.nmf_tags = nestedNmfTags;
    187    item.nmf_tags_parent = parentTags;
    188    item.nmf_tags_parent_weights = parentWeights;
    189 
    190    return item;
    191  }
    192 
    193  /**
    194   * Checks a field's value against another value (either from another field
    195   * or a constant). If the test passes, then the item is emitted, otherwise
    196   * the pipeline is aborted.
    197   *
    198   * Config:
    199   *  field      Field to read the value to test. Left side of operator.
    200   *  op         one of ==, !=, <, <=, >, >=
    201   *  rhsValue   Constant value to compare against. Right side of operator.
    202   *  rhsField   Field to read value to compare against. Right side of operator.
    203   *
    204   * NOTE: rhsValue takes precidence over rhsField.
    205   */
    206  acceptItemByFieldValue(item, config) {
    207    if (!(config.field in item)) {
    208      return null;
    209    }
    210    let rhs = null;
    211    if ("rhsValue" in config) {
    212      rhs = config.rhsValue;
    213    } else if ("rhsField" in config && config.rhsField in item) {
    214      rhs = item[config.rhsField];
    215    }
    216    if (rhs === null) {
    217      return null;
    218    }
    219 
    220    if (
    221      // eslint-disable-next-line eqeqeq
    222      (config.op === "==" && item[config.field] == rhs) ||
    223      // eslint-disable-next-line eqeqeq
    224      (config.op === "!=" && item[config.field] != rhs) ||
    225      (config.op === "<" && item[config.field] < rhs) ||
    226      (config.op === "<=" && item[config.field] <= rhs) ||
    227      (config.op === ">" && item[config.field] > rhs) ||
    228      (config.op === ">=" && item[config.field] >= rhs)
    229    ) {
    230      return item;
    231    }
    232 
    233    return null;
    234  }
    235 
    236  /**
    237   * Splits a URL into text-like tokens.
    238   *
    239   * Config:
    240   *  field   Field containing a URL
    241   *  dest    Field to write the tokens to as an array of strings
    242   *
    243   * NOTE: Any initial 'www' on the hostname is removed.
    244   */
    245  tokenizeUrl(item, config) {
    246    if (!(config.field in item)) {
    247      return null;
    248    }
    249 
    250    let url = new URL(item[config.field]);
    251    let domain = url.hostname;
    252    if (domain.startsWith("www.")) {
    253      domain = domain.substring(4);
    254    }
    255    let toks = this.tokenize(domain);
    256    let pathToks = this.tokenize(
    257      decodeURIComponent(url.pathname.replace(/\+/g, " "))
    258    );
    259    for (let tok of pathToks) {
    260      toks.push(tok);
    261    }
    262    for (let pair of url.searchParams.entries()) {
    263      let k = this.tokenize(decodeURIComponent(pair[0].replace(/\+/g, " ")));
    264      for (let tok of k) {
    265        toks.push(tok);
    266      }
    267      if (pair[1] !== null && pair[1] !== "") {
    268        let v = this.tokenize(decodeURIComponent(pair[1].replace(/\+/g, " ")));
    269        for (let tok of v) {
    270          toks.push(tok);
    271        }
    272      }
    273    }
    274    item[config.dest] = toks;
    275 
    276    return item;
    277  }
    278 
    279  /**
    280   * Gets the hostname (minus any initial "www." along with the left most
    281   * directories on the path.
    282   *
    283   * Config:
    284   *  field          Field containing the URL
    285   *  dest           Field to write the array of strings to
    286   *  path_length    OPTIONAL (DEFAULT: 0) Number of leftmost subdirectories to include
    287   */
    288  getUrlDomain(item, config) {
    289    if (!(config.field in item)) {
    290      return null;
    291    }
    292 
    293    let url = new URL(item[config.field]);
    294    let domain = url.hostname.toLocaleLowerCase();
    295    if (domain.startsWith("www.")) {
    296      domain = domain.substring(4);
    297    }
    298    item[config.dest] = domain;
    299    let pathLength = 0;
    300    if ("path_length" in config) {
    301      pathLength = config.path_length;
    302    }
    303    if (pathLength > 0) {
    304      item[config.dest] += url.pathname
    305        .toLocaleLowerCase()
    306        .split("/")
    307        .slice(0, pathLength + 1)
    308        .join("/");
    309    }
    310 
    311    return item;
    312  }
    313 
    314  /**
    315   * Splits a field into tokens.
    316   * Config:
    317   *  field         Field containing a string to tokenize
    318   *  dest          Field to write the array of strings to
    319   */
    320  tokenizeField(item, config) {
    321    if (!(config.field in item)) {
    322      return null;
    323    }
    324 
    325    item[config.dest] = this.tokenize(item[config.field]);
    326 
    327    return item;
    328  }
    329 
    330  /**
    331   * Deep copy from one field to another.
    332   * Config:
    333   *  src           Field to read from
    334   *  dest          Field to write to
    335   */
    336  copyValue(item, config) {
    337    if (!(config.src in item)) {
    338      return null;
    339    }
    340 
    341    item[config.dest] = JSON.parse(JSON.stringify(item[config.src]));
    342 
    343    return item;
    344  }
    345 
    346  /**
    347   * Converts a field containing a map of strings to a map of strings
    348   * to numbers, to a map of strings to numbers containing at most k elements.
    349   * This operation is performed by first, promoting all the subkeys up one
    350   * level, and then taking the top (or bottom) k values.
    351   *
    352   * Config:
    353   *  field         Points to a map of strings to a map of strings to numbers
    354   *  k             Maximum number of items to keep
    355   *  descending    OPTIONAL (DEFAULT: True) Sorts score in descending  order
    356   *                  (i.e. keeps maximum)
    357   */
    358  keepTopK(item, config) {
    359    if (!(config.field in item)) {
    360      return null;
    361    }
    362    let k = this._lookupScalar(item, config.k, 1048576);
    363    let descending = !("descending" in config) || config.descending !== false;
    364 
    365    // we can't sort by the values in the map, so we have to convert this
    366    // to an array, and then sort.
    367    let sortable = [];
    368    Object.keys(item[config.field]).forEach(outerKey => {
    369      let innerType = this._typeOf(item[config.field][outerKey]);
    370      if (innerType === "map") {
    371        Object.keys(item[config.field][outerKey]).forEach(innerKey => {
    372          sortable.push({
    373            key: innerKey,
    374            value: item[config.field][outerKey][innerKey],
    375          });
    376        });
    377      } else {
    378        sortable.push({ key: outerKey, value: item[config.field][outerKey] });
    379      }
    380    });
    381 
    382    sortable.sort((a, b) => {
    383      if (descending) {
    384        return b.value - a.value;
    385      }
    386      return a.value - b.value;
    387    });
    388 
    389    // now take the top k
    390    let newMap = {};
    391    let i = 0;
    392    for (let pair of sortable) {
    393      if (i >= k) {
    394        break;
    395      }
    396      newMap[pair.key] = pair.value;
    397      i++;
    398    }
    399    item[config.field] = newMap;
    400 
    401    return item;
    402  }
    403 
    404  /**
    405   * Scalar multiplies a vector by some constant
    406   *
    407   * Config:
    408   *  field         Points to:
    409   *                   a map of strings to numbers
    410   *                   an array of numbers
    411   *                   a number
    412   *  k             Either a number, or a string. If it's a number then This
    413   *                  is the scalar value to multiply by. If it's a string,
    414   *                  the value in the pointed to field is used.
    415   *  default       OPTIONAL (DEFAULT: 0), If k is a string, and no numeric
    416   *                  value is found, then use this value.
    417   */
    418  scalarMultiply(item, config) {
    419    if (!(config.field in item)) {
    420      return null;
    421    }
    422    let k = this._lookupScalar(item, config.k, config.dfault);
    423 
    424    let fieldType = this._typeOf(item[config.field]);
    425    if (fieldType === "number") {
    426      item[config.field] *= k;
    427    } else if (fieldType === "array") {
    428      for (let i = 0; i < item[config.field].length; i++) {
    429        item[config.field][i] *= k;
    430      }
    431    } else if (fieldType === "map") {
    432      Object.keys(item[config.field]).forEach(key => {
    433        item[config.field][key] *= k;
    434      });
    435    } else {
    436      return null;
    437    }
    438 
    439    return item;
    440  }
    441 
    442  /**
    443   * Elementwise multiplies either two maps or two arrays together, storing
    444   * the result in left. If left and right are of the same type, results in an
    445   * error.
    446   *
    447   * Maps are special case. For maps the left must be a nested map such as:
    448   * { k1: { k11: 1, k12: 2}, k2: { k21: 3, k22: 4 } } and right needs to be
    449   * simple map such as: { k1: 5, k2: 6} .  The operation is then to mulitply
    450   * every value of every right key, to every value every subkey where the
    451   * parent keys match. Using the previous examples, the result would be:
    452   * { k1: { k11: 5, k12: 10 }, k2: { k21: 18, k22: 24 } } .
    453   *
    454   * Config:
    455   *  left
    456   *  right
    457   */
    458  elementwiseMultiply(item, config) {
    459    if (!(config.left in item) || !(config.right in item)) {
    460      return null;
    461    }
    462    let leftType = this._typeOf(item[config.left]);
    463    if (leftType !== this._typeOf(item[config.right])) {
    464      return null;
    465    }
    466    if (leftType === "array") {
    467      if (item[config.left].length !== item[config.right].length) {
    468        return null;
    469      }
    470      for (let i = 0; i < item[config.left].length; i++) {
    471        item[config.left][i] *= item[config.right][i];
    472      }
    473    } else if (leftType === "map") {
    474      Object.keys(item[config.left]).forEach(outerKey => {
    475        let r = 0.0;
    476        if (outerKey in item[config.right]) {
    477          r = item[config.right][outerKey];
    478        }
    479        Object.keys(item[config.left][outerKey]).forEach(innerKey => {
    480          item[config.left][outerKey][innerKey] *= r;
    481        });
    482      });
    483    } else if (leftType === "number") {
    484      item[config.left] *= item[config.right];
    485    } else {
    486      return null;
    487    }
    488 
    489    return item;
    490  }
    491 
    492  /**
    493   * Vector multiplies (i.e. dot products) two vectors and stores the result in
    494   * third field. Both vectors must either by maps, or arrays of numbers with
    495   * the same length.
    496   *
    497   * Config:
    498   *   left       A field pointing to either a map of strings to numbers,
    499   *                or an array of numbers
    500   *   right      A field pointing to either a map of strings to numbers,
    501   *                or an array of numbers
    502   *   dest       The field to store the dot product.
    503   */
    504  vectorMultiply(item, config) {
    505    if (!(config.left in item) || !(config.right in item)) {
    506      return null;
    507    }
    508 
    509    let leftType = this._typeOf(item[config.left]);
    510    if (leftType !== this._typeOf(item[config.right])) {
    511      return null;
    512    }
    513 
    514    let destVal = 0.0;
    515    if (leftType === "array") {
    516      if (item[config.left].length !== item[config.right].length) {
    517        return null;
    518      }
    519      for (let i = 0; i < item[config.left].length; i++) {
    520        destVal += item[config.left][i] * item[config.right][i];
    521      }
    522    } else if (leftType === "map") {
    523      Object.keys(item[config.left]).forEach(key => {
    524        if (key in item[config.right]) {
    525          destVal += item[config.left][key] * item[config.right][key];
    526        }
    527      });
    528    } else {
    529      return null;
    530    }
    531 
    532    item[config.dest] = destVal;
    533    return item;
    534  }
    535 
    536  /**
    537   * Adds a constant value to all elements in the field. Mathematically,
    538   * this is the same as taking a 1-vector, scalar multiplying it by k,
    539   * and then vector adding it to a field.
    540   *
    541   * Config:
    542   *  field     A field pointing to either a map of strings to numbers,
    543   *                  or an array of numbers
    544   *  k             Either a number, or a string. If it's a number then This
    545   *                  is the scalar value to multiply by. If it's a string,
    546   *                  the value in the pointed to field is used.
    547   *  default       OPTIONAL (DEFAULT: 0), If k is a string, and no numeric
    548   *                  value is found, then use this value.
    549   */
    550  scalarAdd(item, config) {
    551    let k = this._lookupScalar(item, config.k, config.dfault);
    552    if (!(config.field in item)) {
    553      return null;
    554    }
    555 
    556    let fieldType = this._typeOf(item[config.field]);
    557    if (fieldType === "array") {
    558      for (let i = 0; i < item[config.field].length; i++) {
    559        item[config.field][i] += k;
    560      }
    561    } else if (fieldType === "map") {
    562      Object.keys(item[config.field]).forEach(key => {
    563        item[config.field][key] += k;
    564      });
    565    } else if (fieldType === "number") {
    566      item[config.field] += k;
    567    } else {
    568      return null;
    569    }
    570 
    571    return item;
    572  }
    573 
    574  /**
    575   * Adds two vectors together and stores the result in left.
    576   *
    577   * Config:
    578   *  left      A field pointing to either a map of strings to numbers,
    579   *                  or an array of numbers
    580   *  right     A field pointing to either a map of strings to numbers,
    581   *                  or an array of numbers
    582   */
    583  vectorAdd(item, config) {
    584    if (!(config.left in item)) {
    585      return this.copyValue(item, { src: config.right, dest: config.left });
    586    }
    587    if (!(config.right in item)) {
    588      return null;
    589    }
    590 
    591    let leftType = this._typeOf(item[config.left]);
    592    if (leftType !== this._typeOf(item[config.right])) {
    593      return null;
    594    }
    595    if (leftType === "array") {
    596      if (item[config.left].length !== item[config.right].length) {
    597        return null;
    598      }
    599      for (let i = 0; i < item[config.left].length; i++) {
    600        item[config.left][i] += item[config.right][i];
    601      }
    602      return item;
    603    } else if (leftType === "map") {
    604      Object.keys(item[config.right]).forEach(key => {
    605        let v = 0;
    606        if (key in item[config.left]) {
    607          v = item[config.left][key];
    608        }
    609        item[config.left][key] = v + item[config.right][key];
    610      });
    611      return item;
    612    }
    613 
    614    return null;
    615  }
    616 
    617  /**
    618   * Converts a vector from real values to boolean integers. (i.e. either 1/0
    619   * or 1/-1).
    620   *
    621   * Config:
    622   *   field            Field containing either a map of strings to numbers or
    623   *                      an array of numbers to  convert.
    624   *   threshold        OPTIONAL (DEFAULT: 0) Values above this will be replaced
    625   *                      with 1.0. Those below will be converted to 0.
    626   *   keep_negative    OPTIONAL (DEFAULT: False) If true, values below the
    627   *                      threshold will be converted to -1 instead of 0.
    628   */
    629  makeBoolean(item, config) {
    630    if (!(config.field in item)) {
    631      return null;
    632    }
    633    let threshold = this._lookupScalar(item, config.threshold, 0.0);
    634    let type = this._typeOf(item[config.field]);
    635    if (type === "array") {
    636      for (let i = 0; i < item[config.field].length; i++) {
    637        if (item[config.field][i] > threshold) {
    638          item[config.field][i] = 1.0;
    639        } else if (config.keep_negative) {
    640          item[config.field][i] = -1.0;
    641        } else {
    642          item[config.field][i] = 0.0;
    643        }
    644      }
    645    } else if (type === "map") {
    646      Object.keys(item[config.field]).forEach(key => {
    647        let value = item[config.field][key];
    648        if (value > threshold) {
    649          item[config.field][key] = 1.0;
    650        } else if (config.keep_negative) {
    651          item[config.field][key] = -1.0;
    652        } else {
    653          item[config.field][key] = 0.0;
    654        }
    655      });
    656    } else if (type === "number") {
    657      let value = item[config.field];
    658      if (value > threshold) {
    659        item[config.field] = 1.0;
    660      } else if (config.keep_negative) {
    661        item[config.field] = -1.0;
    662      } else {
    663        item[config.field] = 0.0;
    664      }
    665    } else {
    666      return null;
    667    }
    668 
    669    return item;
    670  }
    671 
    672  /**
    673   * Removes all keys from the item except for the ones specified.
    674   *
    675   * fields           An array of strings indicating the fields to keep
    676   */
    677  allowFields(item, config) {
    678    let newItem = {};
    679    for (let ele of config.fields) {
    680      if (ele in item) {
    681        newItem[ele] = item[ele];
    682      }
    683    }
    684    return newItem;
    685  }
    686 
    687  /**
    688   * Removes all keys whose value does not exceed some threshold.
    689   *
    690   * Config:
    691   *   field         Points to a map of strings to numbers
    692   *   threshold     Values must exceed this value, otherwise they are removed.
    693   */
    694  filterByValue(item, config) {
    695    if (!(config.field in item)) {
    696      return null;
    697    }
    698    let threshold = this._lookupScalar(item, config.threshold, 0.0);
    699    let filtered = {};
    700    Object.keys(item[config.field]).forEach(key => {
    701      let value = item[config.field][key];
    702      if (value > threshold) {
    703        filtered[key] = value;
    704      }
    705    });
    706    item[config.field] = filtered;
    707 
    708    return item;
    709  }
    710 
    711  /**
    712   * Rewrites a field so that its values are now L2 normed.
    713   *
    714   * Config:
    715   *  field         Points to a map of strings to numbers, or an array of numbers
    716   */
    717  l2Normalize(item, config) {
    718    if (!(config.field in item)) {
    719      return null;
    720    }
    721    let data = item[config.field];
    722    let type = this._typeOf(data);
    723    if (type === "array") {
    724      let norm = 0.0;
    725      for (let datum of data) {
    726        norm += datum * datum;
    727      }
    728      norm = Math.sqrt(norm);
    729      if (norm !== 0) {
    730        for (let i = 0; i < data.length; i++) {
    731          data[i] /= norm;
    732        }
    733      }
    734    } else if (type === "map") {
    735      let norm = 0.0;
    736      Object.keys(data).forEach(key => {
    737        norm += data[key] * data[key];
    738      });
    739      norm = Math.sqrt(norm);
    740      if (norm !== 0) {
    741        Object.keys(data).forEach(key => {
    742          data[key] /= norm;
    743        });
    744      }
    745    } else {
    746      return null;
    747    }
    748 
    749    item[config.field] = data;
    750 
    751    return item;
    752  }
    753 
    754  /**
    755   * Rewrites a field so that all of its values sum to 1.0
    756   *
    757   * Config:
    758   *  field         Points to a map of strings to numbers, or an array of numbers
    759   */
    760  probNormalize(item, config) {
    761    if (!(config.field in item)) {
    762      return null;
    763    }
    764    let data = item[config.field];
    765    let type = this._typeOf(data);
    766    if (type === "array") {
    767      let norm = 0.0;
    768      for (let datum of data) {
    769        norm += datum;
    770      }
    771      if (norm !== 0) {
    772        for (let i = 0; i < data.length; i++) {
    773          data[i] /= norm;
    774        }
    775      }
    776    } else if (type === "map") {
    777      let norm = 0.0;
    778      Object.keys(item[config.field]).forEach(key => {
    779        norm += item[config.field][key];
    780      });
    781      if (norm !== 0) {
    782        Object.keys(item[config.field]).forEach(key => {
    783          item[config.field][key] /= norm;
    784        });
    785      }
    786    } else {
    787      return null;
    788    }
    789 
    790    return item;
    791  }
    792 
    793  /**
    794   * Stores a value, if it is not already present
    795   *
    796   * Config:
    797   *  field             field to write to if it is missing
    798   *  value             value to store in that field
    799   */
    800  setDefault(item, config) {
    801    let val = this._lookupScalar(item, config.value, config.value);
    802    if (!(config.field in item)) {
    803      item[config.field] = val;
    804    }
    805 
    806    return item;
    807  }
    808 
    809  /**
    810   * Selctively promotes an value from an inner map up to the outer map
    811   *
    812   * Config:
    813   *  haystack            Points to a map of strings to values
    814   *  needle              Key inside the map we should promote up
    815   *  dest                Where we should write the value of haystack[needle]
    816   */
    817  lookupValue(item, config) {
    818    if (config.haystack in item && config.needle in item[config.haystack]) {
    819      item[config.dest] = item[config.haystack][config.needle];
    820    }
    821 
    822    return item;
    823  }
    824 
    825  /**
    826   * Demotes a field into a map
    827   *
    828   * Config:
    829   *  src               Field to copy
    830   *  dest_map          Points to a map
    831   *  dest_key          Key inside dest_map to copy src to
    832   */
    833  copyToMap(item, config) {
    834    if (config.src in item) {
    835      if (!(config.dest_map in item)) {
    836        item[config.dest_map] = {};
    837      }
    838      item[config.dest_map][config.dest_key] = item[config.src];
    839    }
    840 
    841    return item;
    842  }
    843 
    844  /**
    845   * Config:
    846   *  field             Points to a string to number map
    847   *  k                 Scalar to multiply the values by
    848   *  log_scale         Boolean, if true, then the values will be transformed
    849   *                      by a logrithm prior to multiplications
    850   */
    851  scalarMultiplyTag(item, config) {
    852    let EPSILON = 0.000001;
    853    if (!(config.field in item)) {
    854      return null;
    855    }
    856    let k = this._lookupScalar(item, config.k, 1);
    857    let type = this._typeOf(item[config.field]);
    858    if (type === "map") {
    859      Object.keys(item[config.field]).forEach(parentKey => {
    860        Object.keys(item[config.field][parentKey]).forEach(key => {
    861          let v = item[config.field][parentKey][key];
    862          if (config.log_scale) {
    863            v = Math.log(v + EPSILON);
    864          }
    865          item[config.field][parentKey][key] = v * k;
    866        });
    867      });
    868    } else {
    869      return null;
    870    }
    871 
    872    return item;
    873  }
    874 
    875  /**
    876   * Independently applies softmax across all subtags.
    877   *
    878   * Config:
    879   *   field        Points to a map of strings with values being another map of strings
    880   */
    881  applySoftmaxTags(item, config) {
    882    let type = this._typeOf(item[config.field]);
    883    if (type !== "map") {
    884      return null;
    885    }
    886 
    887    let abort = false;
    888    let softmaxSum = {};
    889    Object.keys(item[config.field]).forEach(tag => {
    890      if (this._typeOf(item[config.field][tag]) !== "map") {
    891        abort = true;
    892        return;
    893      }
    894      if (abort) {
    895        return;
    896      }
    897      softmaxSum[tag] = 0;
    898      Object.keys(item[config.field][tag]).forEach(subtag => {
    899        if (this._typeOf(item[config.field][tag][subtag]) !== "number") {
    900          abort = true;
    901          return;
    902        }
    903        let score = item[config.field][tag][subtag];
    904        softmaxSum[tag] += Math.exp(score);
    905      });
    906    });
    907    if (abort) {
    908      return null;
    909    }
    910 
    911    Object.keys(item[config.field]).forEach(tag => {
    912      Object.keys(item[config.field][tag]).forEach(subtag => {
    913        item[config.field][tag][subtag] =
    914          Math.exp(item[config.field][tag][subtag]) / softmaxSum[tag];
    915      });
    916    });
    917 
    918    return item;
    919  }
    920 
    921  /**
    922   * Vector adds a field and stores the result in left.
    923   *
    924   * Config:
    925   *   field              The field to vector add
    926   */
    927  combinerAdd(left, right, config) {
    928    if (!(config.field in right)) {
    929      return left;
    930    }
    931    let type = this._typeOf(right[config.field]);
    932    if (!(config.field in left)) {
    933      if (type === "map") {
    934        left[config.field] = {};
    935      } else if (type === "array") {
    936        left[config.field] = [];
    937      } else if (type === "number") {
    938        left[config.field] = 0;
    939      } else {
    940        return null;
    941      }
    942    }
    943    if (type !== this._typeOf(left[config.field])) {
    944      return null;
    945    }
    946    if (type === "map") {
    947      Object.keys(right[config.field]).forEach(key => {
    948        if (!(key in left[config.field])) {
    949          left[config.field][key] = 0;
    950        }
    951        left[config.field][key] += right[config.field][key];
    952      });
    953    } else if (type === "array") {
    954      for (let i = 0; i < right[config.field].length; i++) {
    955        if (i < left[config.field].length) {
    956          left[config.field][i] += right[config.field][i];
    957        } else {
    958          left[config.field].push(right[config.field][i]);
    959        }
    960      }
    961    } else if (type === "number") {
    962      left[config.field] += right[config.field];
    963    } else {
    964      return null;
    965    }
    966 
    967    return left;
    968  }
    969 
    970  /**
    971   * Stores the maximum value of the field in left.
    972   *
    973   * Config:
    974   *   field              The field to vector add
    975   */
    976  combinerMax(left, right, config) {
    977    if (!(config.field in right)) {
    978      return left;
    979    }
    980    let type = this._typeOf(right[config.field]);
    981    if (!(config.field in left)) {
    982      if (type === "map") {
    983        left[config.field] = {};
    984      } else if (type === "array") {
    985        left[config.field] = [];
    986      } else if (type === "number") {
    987        left[config.field] = 0;
    988      } else {
    989        return null;
    990      }
    991    }
    992    if (type !== this._typeOf(left[config.field])) {
    993      return null;
    994    }
    995    if (type === "map") {
    996      Object.keys(right[config.field]).forEach(key => {
    997        if (
    998          !(key in left[config.field]) ||
    999          right[config.field][key] > left[config.field][key]
   1000        ) {
   1001          left[config.field][key] = right[config.field][key];
   1002        }
   1003      });
   1004    } else if (type === "array") {
   1005      for (let i = 0; i < right[config.field].length; i++) {
   1006        if (i < left[config.field].length) {
   1007          if (left[config.field][i] < right[config.field][i]) {
   1008            left[config.field][i] = right[config.field][i];
   1009          }
   1010        } else {
   1011          left[config.field].push(right[config.field][i]);
   1012        }
   1013      }
   1014    } else if (type === "number") {
   1015      if (left[config.field] < right[config.field]) {
   1016        left[config.field] = right[config.field];
   1017      }
   1018    } else {
   1019      return null;
   1020    }
   1021 
   1022    return left;
   1023  }
   1024 
   1025  /**
   1026   * Associates a value in right with another value in right. This association
   1027   * is then stored in a map in left.
   1028   *
   1029   *     For example: If a sequence of rights is:
   1030   *     { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 41 }
   1031   *     { 'tags': {}, 'url_domain': 'mbusa.com/mercedes',       'time': 21 }
   1032   *     { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 34 }
   1033   *
   1034   *     Then assuming a 'sum' operation, left can build a map that would look like:
   1035   *     {
   1036   *         'maseratiusa.com/maserati': 75,
   1037   *         'mbusa.com/mercedes': 21,
   1038   *     }
   1039   *
   1040   * Fields:
   1041   *  left_field              field in the left to store / update the map
   1042   *  right_key_field         Field in the right to use as a key
   1043   *  right_value_field       Field in the right to use as a value
   1044   *  operation               One of "sum", "max", "overwrite", "count"
   1045   */
   1046  combinerCollectValues(left, right, config) {
   1047    let op;
   1048    if (config.operation === "sum") {
   1049      op = (a, b) => a + b;
   1050    } else if (config.operation === "max") {
   1051      op = (a, b) => (a > b ? a : b);
   1052    } else if (config.operation === "overwrite") {
   1053      op = (a, b) => b;
   1054    } else if (config.operation === "count") {
   1055      op = a => a + 1;
   1056    } else {
   1057      return null;
   1058    }
   1059    if (!(config.left_field in left)) {
   1060      left[config.left_field] = {};
   1061    }
   1062    if (
   1063      !(config.right_key_field in right) ||
   1064      !(config.right_value_field in right)
   1065    ) {
   1066      return left;
   1067    }
   1068 
   1069    let key = right[config.right_key_field];
   1070    let rightValue = right[config.right_value_field];
   1071    let leftValue = 0.0;
   1072    if (key in left[config.left_field]) {
   1073      leftValue = left[config.left_field][key];
   1074    }
   1075 
   1076    left[config.left_field][key] = op(leftValue, rightValue);
   1077 
   1078    return left;
   1079  }
   1080 
   1081  /**
   1082   * Executes a recipe. Returns an object on success, or null on failure.
   1083   */
   1084  executeRecipe(item, recipe) {
   1085    let newItem = item;
   1086    if (recipe) {
   1087      for (let step of recipe) {
   1088        let op = this.ITEM_BUILDER_REGISTRY[step.function];
   1089        if (op === undefined) {
   1090          return null;
   1091        }
   1092        newItem = op.call(this, newItem, step);
   1093        if (newItem === null) {
   1094          break;
   1095        }
   1096      }
   1097    }
   1098    return newItem;
   1099  }
   1100 
   1101  /**
   1102   * Executes a recipe. Returns an object on success, or null on failure.
   1103   */
   1104  executeCombinerRecipe(item1, item2, recipe) {
   1105    let newItem1 = item1;
   1106    for (let step of recipe) {
   1107      let op = this.ITEM_COMBINER_REGISTRY[step.function];
   1108      if (op === undefined) {
   1109        return null;
   1110      }
   1111      newItem1 = op.call(this, newItem1, item2, step);
   1112      if (newItem1 === null) {
   1113        break;
   1114      }
   1115    }
   1116 
   1117    return newItem1;
   1118  }
   1119 }